compile.cpp revision 6761:739468857ffb
1/*
2 * Copyright (c) 1997, 2014, Oracle and/or its affiliates. All rights reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20 * or visit www.oracle.com if you need additional information or have any
21 * questions.
22 *
23 */
24
25#include "precompiled.hpp"
26#include "asm/macroAssembler.hpp"
27#include "asm/macroAssembler.inline.hpp"
28#include "ci/ciReplay.hpp"
29#include "classfile/systemDictionary.hpp"
30#include "code/exceptionHandlerTable.hpp"
31#include "code/nmethod.hpp"
32#include "compiler/compileBroker.hpp"
33#include "compiler/compileLog.hpp"
34#include "compiler/disassembler.hpp"
35#include "compiler/oopMap.hpp"
36#include "opto/addnode.hpp"
37#include "opto/block.hpp"
38#include "opto/c2compiler.hpp"
39#include "opto/callGenerator.hpp"
40#include "opto/callnode.hpp"
41#include "opto/cfgnode.hpp"
42#include "opto/chaitin.hpp"
43#include "opto/compile.hpp"
44#include "opto/connode.hpp"
45#include "opto/divnode.hpp"
46#include "opto/escape.hpp"
47#include "opto/idealGraphPrinter.hpp"
48#include "opto/loopnode.hpp"
49#include "opto/machnode.hpp"
50#include "opto/macro.hpp"
51#include "opto/matcher.hpp"
52#include "opto/mathexactnode.hpp"
53#include "opto/memnode.hpp"
54#include "opto/mulnode.hpp"
55#include "opto/narrowptrnode.hpp"
56#include "opto/node.hpp"
57#include "opto/opcodes.hpp"
58#include "opto/output.hpp"
59#include "opto/parse.hpp"
60#include "opto/phaseX.hpp"
61#include "opto/rootnode.hpp"
62#include "opto/runtime.hpp"
63#include "opto/stringopts.hpp"
64#include "opto/type.hpp"
65#include "opto/vectornode.hpp"
66#include "runtime/arguments.hpp"
67#include "runtime/signature.hpp"
68#include "runtime/stubRoutines.hpp"
69#include "runtime/timer.hpp"
70#include "trace/tracing.hpp"
71#include "utilities/copy.hpp"
72
73
74// -------------------- Compile::mach_constant_base_node -----------------------
75// Constant table base node singleton.
76MachConstantBaseNode* Compile::mach_constant_base_node() {
77  if (_mach_constant_base_node == NULL) {
78    _mach_constant_base_node = new MachConstantBaseNode();
79    _mach_constant_base_node->add_req(C->root());
80  }
81  return _mach_constant_base_node;
82}
83
84
85/// Support for intrinsics.
86
87// Return the index at which m must be inserted (or already exists).
88// The sort order is by the address of the ciMethod, with is_virtual as minor key.
89int Compile::intrinsic_insertion_index(ciMethod* m, bool is_virtual) {
90#ifdef ASSERT
91  for (int i = 1; i < _intrinsics->length(); i++) {
92    CallGenerator* cg1 = _intrinsics->at(i-1);
93    CallGenerator* cg2 = _intrinsics->at(i);
94    assert(cg1->method() != cg2->method()
95           ? cg1->method()     < cg2->method()
96           : cg1->is_virtual() < cg2->is_virtual(),
97           "compiler intrinsics list must stay sorted");
98  }
99#endif
100  // Binary search sorted list, in decreasing intervals [lo, hi].
101  int lo = 0, hi = _intrinsics->length()-1;
102  while (lo <= hi) {
103    int mid = (uint)(hi + lo) / 2;
104    ciMethod* mid_m = _intrinsics->at(mid)->method();
105    if (m < mid_m) {
106      hi = mid-1;
107    } else if (m > mid_m) {
108      lo = mid+1;
109    } else {
110      // look at minor sort key
111      bool mid_virt = _intrinsics->at(mid)->is_virtual();
112      if (is_virtual < mid_virt) {
113        hi = mid-1;
114      } else if (is_virtual > mid_virt) {
115        lo = mid+1;
116      } else {
117        return mid;  // exact match
118      }
119    }
120  }
121  return lo;  // inexact match
122}
123
124void Compile::register_intrinsic(CallGenerator* cg) {
125  if (_intrinsics == NULL) {
126    _intrinsics = new (comp_arena())GrowableArray<CallGenerator*>(comp_arena(), 60, 0, NULL);
127  }
128  // This code is stolen from ciObjectFactory::insert.
129  // Really, GrowableArray should have methods for
130  // insert_at, remove_at, and binary_search.
131  int len = _intrinsics->length();
132  int index = intrinsic_insertion_index(cg->method(), cg->is_virtual());
133  if (index == len) {
134    _intrinsics->append(cg);
135  } else {
136#ifdef ASSERT
137    CallGenerator* oldcg = _intrinsics->at(index);
138    assert(oldcg->method() != cg->method() || oldcg->is_virtual() != cg->is_virtual(), "don't register twice");
139#endif
140    _intrinsics->append(_intrinsics->at(len-1));
141    int pos;
142    for (pos = len-2; pos >= index; pos--) {
143      _intrinsics->at_put(pos+1,_intrinsics->at(pos));
144    }
145    _intrinsics->at_put(index, cg);
146  }
147  assert(find_intrinsic(cg->method(), cg->is_virtual()) == cg, "registration worked");
148}
149
150CallGenerator* Compile::find_intrinsic(ciMethod* m, bool is_virtual) {
151  assert(m->is_loaded(), "don't try this on unloaded methods");
152  if (_intrinsics != NULL) {
153    int index = intrinsic_insertion_index(m, is_virtual);
154    if (index < _intrinsics->length()
155        && _intrinsics->at(index)->method() == m
156        && _intrinsics->at(index)->is_virtual() == is_virtual) {
157      return _intrinsics->at(index);
158    }
159  }
160  // Lazily create intrinsics for intrinsic IDs well-known in the runtime.
161  if (m->intrinsic_id() != vmIntrinsics::_none &&
162      m->intrinsic_id() <= vmIntrinsics::LAST_COMPILER_INLINE) {
163    CallGenerator* cg = make_vm_intrinsic(m, is_virtual);
164    if (cg != NULL) {
165      // Save it for next time:
166      register_intrinsic(cg);
167      return cg;
168    } else {
169      gather_intrinsic_statistics(m->intrinsic_id(), is_virtual, _intrinsic_disabled);
170    }
171  }
172  return NULL;
173}
174
175// Compile:: register_library_intrinsics and make_vm_intrinsic are defined
176// in library_call.cpp.
177
178
179#ifndef PRODUCT
180// statistics gathering...
181
182juint  Compile::_intrinsic_hist_count[vmIntrinsics::ID_LIMIT] = {0};
183jubyte Compile::_intrinsic_hist_flags[vmIntrinsics::ID_LIMIT] = {0};
184
185bool Compile::gather_intrinsic_statistics(vmIntrinsics::ID id, bool is_virtual, int flags) {
186  assert(id > vmIntrinsics::_none && id < vmIntrinsics::ID_LIMIT, "oob");
187  int oflags = _intrinsic_hist_flags[id];
188  assert(flags != 0, "what happened?");
189  if (is_virtual) {
190    flags |= _intrinsic_virtual;
191  }
192  bool changed = (flags != oflags);
193  if ((flags & _intrinsic_worked) != 0) {
194    juint count = (_intrinsic_hist_count[id] += 1);
195    if (count == 1) {
196      changed = true;           // first time
197    }
198    // increment the overall count also:
199    _intrinsic_hist_count[vmIntrinsics::_none] += 1;
200  }
201  if (changed) {
202    if (((oflags ^ flags) & _intrinsic_virtual) != 0) {
203      // Something changed about the intrinsic's virtuality.
204      if ((flags & _intrinsic_virtual) != 0) {
205        // This is the first use of this intrinsic as a virtual call.
206        if (oflags != 0) {
207          // We already saw it as a non-virtual, so note both cases.
208          flags |= _intrinsic_both;
209        }
210      } else if ((oflags & _intrinsic_both) == 0) {
211        // This is the first use of this intrinsic as a non-virtual
212        flags |= _intrinsic_both;
213      }
214    }
215    _intrinsic_hist_flags[id] = (jubyte) (oflags | flags);
216  }
217  // update the overall flags also:
218  _intrinsic_hist_flags[vmIntrinsics::_none] |= (jubyte) flags;
219  return changed;
220}
221
222static char* format_flags(int flags, char* buf) {
223  buf[0] = 0;
224  if ((flags & Compile::_intrinsic_worked) != 0)    strcat(buf, ",worked");
225  if ((flags & Compile::_intrinsic_failed) != 0)    strcat(buf, ",failed");
226  if ((flags & Compile::_intrinsic_disabled) != 0)  strcat(buf, ",disabled");
227  if ((flags & Compile::_intrinsic_virtual) != 0)   strcat(buf, ",virtual");
228  if ((flags & Compile::_intrinsic_both) != 0)      strcat(buf, ",nonvirtual");
229  if (buf[0] == 0)  strcat(buf, ",");
230  assert(buf[0] == ',', "must be");
231  return &buf[1];
232}
233
234void Compile::print_intrinsic_statistics() {
235  char flagsbuf[100];
236  ttyLocker ttyl;
237  if (xtty != NULL)  xtty->head("statistics type='intrinsic'");
238  tty->print_cr("Compiler intrinsic usage:");
239  juint total = _intrinsic_hist_count[vmIntrinsics::_none];
240  if (total == 0)  total = 1;  // avoid div0 in case of no successes
241  #define PRINT_STAT_LINE(name, c, f) \
242    tty->print_cr("  %4d (%4.1f%%) %s (%s)", (int)(c), ((c) * 100.0) / total, name, f);
243  for (int index = 1 + (int)vmIntrinsics::_none; index < (int)vmIntrinsics::ID_LIMIT; index++) {
244    vmIntrinsics::ID id = (vmIntrinsics::ID) index;
245    int   flags = _intrinsic_hist_flags[id];
246    juint count = _intrinsic_hist_count[id];
247    if ((flags | count) != 0) {
248      PRINT_STAT_LINE(vmIntrinsics::name_at(id), count, format_flags(flags, flagsbuf));
249    }
250  }
251  PRINT_STAT_LINE("total", total, format_flags(_intrinsic_hist_flags[vmIntrinsics::_none], flagsbuf));
252  if (xtty != NULL)  xtty->tail("statistics");
253}
254
255void Compile::print_statistics() {
256  { ttyLocker ttyl;
257    if (xtty != NULL)  xtty->head("statistics type='opto'");
258    Parse::print_statistics();
259    PhaseCCP::print_statistics();
260    PhaseRegAlloc::print_statistics();
261    Scheduling::print_statistics();
262    PhasePeephole::print_statistics();
263    PhaseIdealLoop::print_statistics();
264    if (xtty != NULL)  xtty->tail("statistics");
265  }
266  if (_intrinsic_hist_flags[vmIntrinsics::_none] != 0) {
267    // put this under its own <statistics> element.
268    print_intrinsic_statistics();
269  }
270}
271#endif //PRODUCT
272
273// Support for bundling info
274Bundle* Compile::node_bundling(const Node *n) {
275  assert(valid_bundle_info(n), "oob");
276  return &_node_bundling_base[n->_idx];
277}
278
279bool Compile::valid_bundle_info(const Node *n) {
280  return (_node_bundling_limit > n->_idx);
281}
282
283
284void Compile::gvn_replace_by(Node* n, Node* nn) {
285  for (DUIterator_Last imin, i = n->last_outs(imin); i >= imin; ) {
286    Node* use = n->last_out(i);
287    bool is_in_table = initial_gvn()->hash_delete(use);
288    uint uses_found = 0;
289    for (uint j = 0; j < use->len(); j++) {
290      if (use->in(j) == n) {
291        if (j < use->req())
292          use->set_req(j, nn);
293        else
294          use->set_prec(j, nn);
295        uses_found++;
296      }
297    }
298    if (is_in_table) {
299      // reinsert into table
300      initial_gvn()->hash_find_insert(use);
301    }
302    record_for_igvn(use);
303    i -= uses_found;    // we deleted 1 or more copies of this edge
304  }
305}
306
307
308static inline bool not_a_node(const Node* n) {
309  if (n == NULL)                   return true;
310  if (((intptr_t)n & 1) != 0)      return true;  // uninitialized, etc.
311  if (*(address*)n == badAddress)  return true;  // kill by Node::destruct
312  return false;
313}
314
315// Identify all nodes that are reachable from below, useful.
316// Use breadth-first pass that records state in a Unique_Node_List,
317// recursive traversal is slower.
318void Compile::identify_useful_nodes(Unique_Node_List &useful) {
319  int estimated_worklist_size = unique();
320  useful.map( estimated_worklist_size, NULL );  // preallocate space
321
322  // Initialize worklist
323  if (root() != NULL)     { useful.push(root()); }
324  // If 'top' is cached, declare it useful to preserve cached node
325  if( cached_top_node() ) { useful.push(cached_top_node()); }
326
327  // Push all useful nodes onto the list, breadthfirst
328  for( uint next = 0; next < useful.size(); ++next ) {
329    assert( next < unique(), "Unique useful nodes < total nodes");
330    Node *n  = useful.at(next);
331    uint max = n->len();
332    for( uint i = 0; i < max; ++i ) {
333      Node *m = n->in(i);
334      if (not_a_node(m))  continue;
335      useful.push(m);
336    }
337  }
338}
339
340// Update dead_node_list with any missing dead nodes using useful
341// list. Consider all non-useful nodes to be useless i.e., dead nodes.
342void Compile::update_dead_node_list(Unique_Node_List &useful) {
343  uint max_idx = unique();
344  VectorSet& useful_node_set = useful.member_set();
345
346  for (uint node_idx = 0; node_idx < max_idx; node_idx++) {
347    // If node with index node_idx is not in useful set,
348    // mark it as dead in dead node list.
349    if (! useful_node_set.test(node_idx) ) {
350      record_dead_node(node_idx);
351    }
352  }
353}
354
355void Compile::remove_useless_late_inlines(GrowableArray<CallGenerator*>* inlines, Unique_Node_List &useful) {
356  int shift = 0;
357  for (int i = 0; i < inlines->length(); i++) {
358    CallGenerator* cg = inlines->at(i);
359    CallNode* call = cg->call_node();
360    if (shift > 0) {
361      inlines->at_put(i-shift, cg);
362    }
363    if (!useful.member(call)) {
364      shift++;
365    }
366  }
367  inlines->trunc_to(inlines->length()-shift);
368}
369
370// Disconnect all useless nodes by disconnecting those at the boundary.
371void Compile::remove_useless_nodes(Unique_Node_List &useful) {
372  uint next = 0;
373  while (next < useful.size()) {
374    Node *n = useful.at(next++);
375    if (n->is_SafePoint()) {
376      // We're done with a parsing phase. Replaced nodes are not valid
377      // beyond that point.
378      n->as_SafePoint()->delete_replaced_nodes();
379    }
380    // Use raw traversal of out edges since this code removes out edges
381    int max = n->outcnt();
382    for (int j = 0; j < max; ++j) {
383      Node* child = n->raw_out(j);
384      if (! useful.member(child)) {
385        assert(!child->is_top() || child != top(),
386               "If top is cached in Compile object it is in useful list");
387        // Only need to remove this out-edge to the useless node
388        n->raw_del_out(j);
389        --j;
390        --max;
391      }
392    }
393    if (n->outcnt() == 1 && n->has_special_unique_user()) {
394      record_for_igvn(n->unique_out());
395    }
396  }
397  // Remove useless macro and predicate opaq nodes
398  for (int i = C->macro_count()-1; i >= 0; i--) {
399    Node* n = C->macro_node(i);
400    if (!useful.member(n)) {
401      remove_macro_node(n);
402    }
403  }
404  // Remove useless expensive node
405  for (int i = C->expensive_count()-1; i >= 0; i--) {
406    Node* n = C->expensive_node(i);
407    if (!useful.member(n)) {
408      remove_expensive_node(n);
409    }
410  }
411  // clean up the late inline lists
412  remove_useless_late_inlines(&_string_late_inlines, useful);
413  remove_useless_late_inlines(&_boxing_late_inlines, useful);
414  remove_useless_late_inlines(&_late_inlines, useful);
415  debug_only(verify_graph_edges(true/*check for no_dead_code*/);)
416}
417
418//------------------------------frame_size_in_words-----------------------------
419// frame_slots in units of words
420int Compile::frame_size_in_words() const {
421  // shift is 0 in LP32 and 1 in LP64
422  const int shift = (LogBytesPerWord - LogBytesPerInt);
423  int words = _frame_slots >> shift;
424  assert( words << shift == _frame_slots, "frame size must be properly aligned in LP64" );
425  return words;
426}
427
428// To bang the stack of this compiled method we use the stack size
429// that the interpreter would need in case of a deoptimization. This
430// removes the need to bang the stack in the deoptimization blob which
431// in turn simplifies stack overflow handling.
432int Compile::bang_size_in_bytes() const {
433  return MAX2(_interpreter_frame_size, frame_size_in_bytes());
434}
435
436// ============================================================================
437//------------------------------CompileWrapper---------------------------------
438class CompileWrapper : public StackObj {
439  Compile *const _compile;
440 public:
441  CompileWrapper(Compile* compile);
442
443  ~CompileWrapper();
444};
445
446CompileWrapper::CompileWrapper(Compile* compile) : _compile(compile) {
447  // the Compile* pointer is stored in the current ciEnv:
448  ciEnv* env = compile->env();
449  assert(env == ciEnv::current(), "must already be a ciEnv active");
450  assert(env->compiler_data() == NULL, "compile already active?");
451  env->set_compiler_data(compile);
452  assert(compile == Compile::current(), "sanity");
453
454  compile->set_type_dict(NULL);
455  compile->set_type_hwm(NULL);
456  compile->set_type_last_size(0);
457  compile->set_last_tf(NULL, NULL);
458  compile->set_indexSet_arena(NULL);
459  compile->set_indexSet_free_block_list(NULL);
460  compile->init_type_arena();
461  Type::Initialize(compile);
462  _compile->set_scratch_buffer_blob(NULL);
463  _compile->begin_method();
464}
465CompileWrapper::~CompileWrapper() {
466  _compile->end_method();
467  if (_compile->scratch_buffer_blob() != NULL)
468    BufferBlob::free(_compile->scratch_buffer_blob());
469  _compile->env()->set_compiler_data(NULL);
470}
471
472
473//----------------------------print_compile_messages---------------------------
474void Compile::print_compile_messages() {
475#ifndef PRODUCT
476  // Check if recompiling
477  if (_subsume_loads == false && PrintOpto) {
478    // Recompiling without allowing machine instructions to subsume loads
479    tty->print_cr("*********************************************************");
480    tty->print_cr("** Bailout: Recompile without subsuming loads          **");
481    tty->print_cr("*********************************************************");
482  }
483  if (_do_escape_analysis != DoEscapeAnalysis && PrintOpto) {
484    // Recompiling without escape analysis
485    tty->print_cr("*********************************************************");
486    tty->print_cr("** Bailout: Recompile without escape analysis          **");
487    tty->print_cr("*********************************************************");
488  }
489  if (_eliminate_boxing != EliminateAutoBox && PrintOpto) {
490    // Recompiling without boxing elimination
491    tty->print_cr("*********************************************************");
492    tty->print_cr("** Bailout: Recompile without boxing elimination       **");
493    tty->print_cr("*********************************************************");
494  }
495  if (env()->break_at_compile()) {
496    // Open the debugger when compiling this method.
497    tty->print("### Breaking when compiling: ");
498    method()->print_short_name();
499    tty->cr();
500    BREAKPOINT;
501  }
502
503  if( PrintOpto ) {
504    if (is_osr_compilation()) {
505      tty->print("[OSR]%3d", _compile_id);
506    } else {
507      tty->print("%3d", _compile_id);
508    }
509  }
510#endif
511}
512
513
514//-----------------------init_scratch_buffer_blob------------------------------
515// Construct a temporary BufferBlob and cache it for this compile.
516void Compile::init_scratch_buffer_blob(int const_size) {
517  // If there is already a scratch buffer blob allocated and the
518  // constant section is big enough, use it.  Otherwise free the
519  // current and allocate a new one.
520  BufferBlob* blob = scratch_buffer_blob();
521  if ((blob != NULL) && (const_size <= _scratch_const_size)) {
522    // Use the current blob.
523  } else {
524    if (blob != NULL) {
525      BufferBlob::free(blob);
526    }
527
528    ResourceMark rm;
529    _scratch_const_size = const_size;
530    int size = (MAX_inst_size + MAX_stubs_size + _scratch_const_size);
531    blob = BufferBlob::create("Compile::scratch_buffer", size);
532    // Record the buffer blob for next time.
533    set_scratch_buffer_blob(blob);
534    // Have we run out of code space?
535    if (scratch_buffer_blob() == NULL) {
536      // Let CompilerBroker disable further compilations.
537      record_failure("Not enough space for scratch buffer in CodeCache");
538      CompileBroker::handle_full_code_cache();
539      return;
540    }
541  }
542
543  // Initialize the relocation buffers
544  relocInfo* locs_buf = (relocInfo*) blob->content_end() - MAX_locs_size;
545  set_scratch_locs_memory(locs_buf);
546}
547
548
549//-----------------------scratch_emit_size-------------------------------------
550// Helper function that computes size by emitting code
551uint Compile::scratch_emit_size(const Node* n) {
552  // Start scratch_emit_size section.
553  set_in_scratch_emit_size(true);
554
555  // Emit into a trash buffer and count bytes emitted.
556  // This is a pretty expensive way to compute a size,
557  // but it works well enough if seldom used.
558  // All common fixed-size instructions are given a size
559  // method by the AD file.
560  // Note that the scratch buffer blob and locs memory are
561  // allocated at the beginning of the compile task, and
562  // may be shared by several calls to scratch_emit_size.
563  // The allocation of the scratch buffer blob is particularly
564  // expensive, since it has to grab the code cache lock.
565  BufferBlob* blob = this->scratch_buffer_blob();
566  assert(blob != NULL, "Initialize BufferBlob at start");
567  assert(blob->size() > MAX_inst_size, "sanity");
568  relocInfo* locs_buf = scratch_locs_memory();
569  address blob_begin = blob->content_begin();
570  address blob_end   = (address)locs_buf;
571  assert(blob->content_contains(blob_end), "sanity");
572  CodeBuffer buf(blob_begin, blob_end - blob_begin);
573  buf.initialize_consts_size(_scratch_const_size);
574  buf.initialize_stubs_size(MAX_stubs_size);
575  assert(locs_buf != NULL, "sanity");
576  int lsize = MAX_locs_size / 3;
577  buf.consts()->initialize_shared_locs(&locs_buf[lsize * 0], lsize);
578  buf.insts()->initialize_shared_locs( &locs_buf[lsize * 1], lsize);
579  buf.stubs()->initialize_shared_locs( &locs_buf[lsize * 2], lsize);
580
581  // Do the emission.
582
583  Label fakeL; // Fake label for branch instructions.
584  Label*   saveL = NULL;
585  uint save_bnum = 0;
586  bool is_branch = n->is_MachBranch();
587  if (is_branch) {
588    MacroAssembler masm(&buf);
589    masm.bind(fakeL);
590    n->as_MachBranch()->save_label(&saveL, &save_bnum);
591    n->as_MachBranch()->label_set(&fakeL, 0);
592  }
593  n->emit(buf, this->regalloc());
594  if (is_branch) // Restore label.
595    n->as_MachBranch()->label_set(saveL, save_bnum);
596
597  // End scratch_emit_size section.
598  set_in_scratch_emit_size(false);
599
600  return buf.insts_size();
601}
602
603
604// ============================================================================
605//------------------------------Compile standard-------------------------------
606debug_only( int Compile::_debug_idx = 100000; )
607
608// Compile a method.  entry_bci is -1 for normal compilations and indicates
609// the continuation bci for on stack replacement.
610
611
612Compile::Compile( ciEnv* ci_env, C2Compiler* compiler, ciMethod* target, int osr_bci,
613                  bool subsume_loads, bool do_escape_analysis, bool eliminate_boxing )
614                : Phase(Compiler),
615                  _env(ci_env),
616                  _log(ci_env->log()),
617                  _compile_id(ci_env->compile_id()),
618                  _save_argument_registers(false),
619                  _stub_name(NULL),
620                  _stub_function(NULL),
621                  _stub_entry_point(NULL),
622                  _method(target),
623                  _entry_bci(osr_bci),
624                  _initial_gvn(NULL),
625                  _for_igvn(NULL),
626                  _warm_calls(NULL),
627                  _subsume_loads(subsume_loads),
628                  _do_escape_analysis(do_escape_analysis),
629                  _eliminate_boxing(eliminate_boxing),
630                  _failure_reason(NULL),
631                  _code_buffer("Compile::Fill_buffer"),
632                  _orig_pc_slot(0),
633                  _orig_pc_slot_offset_in_bytes(0),
634                  _has_method_handle_invokes(false),
635                  _mach_constant_base_node(NULL),
636                  _node_bundling_limit(0),
637                  _node_bundling_base(NULL),
638                  _java_calls(0),
639                  _inner_loops(0),
640                  _scratch_const_size(-1),
641                  _in_scratch_emit_size(false),
642                  _dead_node_list(comp_arena()),
643                  _dead_node_count(0),
644#ifndef PRODUCT
645                  _trace_opto_output(TraceOptoOutput || method()->has_option("TraceOptoOutput")),
646                  _in_dump_cnt(0),
647                  _printer(IdealGraphPrinter::printer()),
648#endif
649                  _congraph(NULL),
650                  _replay_inline_data(NULL),
651                  _late_inlines(comp_arena(), 2, 0, NULL),
652                  _string_late_inlines(comp_arena(), 2, 0, NULL),
653                  _boxing_late_inlines(comp_arena(), 2, 0, NULL),
654                  _late_inlines_pos(0),
655                  _number_of_mh_late_inlines(0),
656                  _inlining_progress(false),
657                  _inlining_incrementally(false),
658                  _print_inlining_list(NULL),
659                  _print_inlining_stream(NULL),
660                  _print_inlining_idx(0),
661                  _print_inlining_output(NULL),
662                  _interpreter_frame_size(0) {
663  C = this;
664
665  CompileWrapper cw(this);
666#ifndef PRODUCT
667  if (TimeCompiler2) {
668    tty->print(" ");
669    target->holder()->name()->print();
670    tty->print(".");
671    target->print_short_name();
672    tty->print("  ");
673  }
674  TraceTime t1("Total compilation time", &_t_totalCompilation, TimeCompiler, TimeCompiler2);
675  TraceTime t2(NULL, &_t_methodCompilation, TimeCompiler, false);
676  bool print_opto_assembly = PrintOptoAssembly || _method->has_option("PrintOptoAssembly");
677  if (!print_opto_assembly) {
678    bool print_assembly = (PrintAssembly || _method->should_print_assembly());
679    if (print_assembly && !Disassembler::can_decode()) {
680      tty->print_cr("PrintAssembly request changed to PrintOptoAssembly");
681      print_opto_assembly = true;
682    }
683  }
684  set_print_assembly(print_opto_assembly);
685  set_parsed_irreducible_loop(false);
686
687  if (method()->has_option("ReplayInline")) {
688    _replay_inline_data = ciReplay::load_inline_data(method(), entry_bci(), ci_env->comp_level());
689  }
690#endif
691  set_print_inlining(PrintInlining || method()->has_option("PrintInlining") NOT_PRODUCT( || PrintOptoInlining));
692  set_print_intrinsics(PrintIntrinsics || method()->has_option("PrintIntrinsics"));
693  set_has_irreducible_loop(true); // conservative until build_loop_tree() reset it
694
695  if (ProfileTraps RTM_OPT_ONLY( || UseRTMLocking )) {
696    // Make sure the method being compiled gets its own MDO,
697    // so we can at least track the decompile_count().
698    // Need MDO to record RTM code generation state.
699    method()->ensure_method_data();
700  }
701
702  Init(::AliasLevel);
703
704
705  print_compile_messages();
706
707  _ilt = InlineTree::build_inline_tree_root();
708
709  // Even if NO memory addresses are used, MergeMem nodes must have at least 1 slice
710  assert(num_alias_types() >= AliasIdxRaw, "");
711
712#define MINIMUM_NODE_HASH  1023
713  // Node list that Iterative GVN will start with
714  Unique_Node_List for_igvn(comp_arena());
715  set_for_igvn(&for_igvn);
716
717  // GVN that will be run immediately on new nodes
718  uint estimated_size = method()->code_size()*4+64;
719  estimated_size = (estimated_size < MINIMUM_NODE_HASH ? MINIMUM_NODE_HASH : estimated_size);
720  PhaseGVN gvn(node_arena(), estimated_size);
721  set_initial_gvn(&gvn);
722
723  print_inlining_init();
724  { // Scope for timing the parser
725    TracePhase t3("parse", &_t_parser, true);
726
727    // Put top into the hash table ASAP.
728    initial_gvn()->transform_no_reclaim(top());
729
730    // Set up tf(), start(), and find a CallGenerator.
731    CallGenerator* cg = NULL;
732    if (is_osr_compilation()) {
733      const TypeTuple *domain = StartOSRNode::osr_domain();
734      const TypeTuple *range = TypeTuple::make_range(method()->signature());
735      init_tf(TypeFunc::make(domain, range));
736      StartNode* s = new StartOSRNode(root(), domain);
737      initial_gvn()->set_type_bottom(s);
738      init_start(s);
739      cg = CallGenerator::for_osr(method(), entry_bci());
740    } else {
741      // Normal case.
742      init_tf(TypeFunc::make(method()));
743      StartNode* s = new StartNode(root(), tf()->domain());
744      initial_gvn()->set_type_bottom(s);
745      init_start(s);
746      if (method()->intrinsic_id() == vmIntrinsics::_Reference_get && UseG1GC) {
747        // With java.lang.ref.reference.get() we must go through the
748        // intrinsic when G1 is enabled - even when get() is the root
749        // method of the compile - so that, if necessary, the value in
750        // the referent field of the reference object gets recorded by
751        // the pre-barrier code.
752        // Specifically, if G1 is enabled, the value in the referent
753        // field is recorded by the G1 SATB pre barrier. This will
754        // result in the referent being marked live and the reference
755        // object removed from the list of discovered references during
756        // reference processing.
757        cg = find_intrinsic(method(), false);
758      }
759      if (cg == NULL) {
760        float past_uses = method()->interpreter_invocation_count();
761        float expected_uses = past_uses;
762        cg = CallGenerator::for_inline(method(), expected_uses);
763      }
764    }
765    if (failing())  return;
766    if (cg == NULL) {
767      record_method_not_compilable_all_tiers("cannot parse method");
768      return;
769    }
770    JVMState* jvms = build_start_state(start(), tf());
771    if ((jvms = cg->generate(jvms)) == NULL) {
772      record_method_not_compilable("method parse failed");
773      return;
774    }
775    GraphKit kit(jvms);
776
777    if (!kit.stopped()) {
778      // Accept return values, and transfer control we know not where.
779      // This is done by a special, unique ReturnNode bound to root.
780      return_values(kit.jvms());
781    }
782
783    if (kit.has_exceptions()) {
784      // Any exceptions that escape from this call must be rethrown
785      // to whatever caller is dynamically above us on the stack.
786      // This is done by a special, unique RethrowNode bound to root.
787      rethrow_exceptions(kit.transfer_exceptions_into_jvms());
788    }
789
790    assert(IncrementalInline || (_late_inlines.length() == 0 && !has_mh_late_inlines()), "incremental inlining is off");
791
792    if (_late_inlines.length() == 0 && !has_mh_late_inlines() && !failing() && has_stringbuilder()) {
793      inline_string_calls(true);
794    }
795
796    if (failing())  return;
797
798    print_method(PHASE_BEFORE_REMOVEUSELESS, 3);
799
800    // Remove clutter produced by parsing.
801    if (!failing()) {
802      ResourceMark rm;
803      PhaseRemoveUseless pru(initial_gvn(), &for_igvn);
804    }
805  }
806
807  // Note:  Large methods are capped off in do_one_bytecode().
808  if (failing())  return;
809
810  // After parsing, node notes are no longer automagic.
811  // They must be propagated by register_new_node_with_optimizer(),
812  // clone(), or the like.
813  set_default_node_notes(NULL);
814
815  for (;;) {
816    int successes = Inline_Warm();
817    if (failing())  return;
818    if (successes == 0)  break;
819  }
820
821  // Drain the list.
822  Finish_Warm();
823#ifndef PRODUCT
824  if (_printer) {
825    _printer->print_inlining(this);
826  }
827#endif
828
829  if (failing())  return;
830  NOT_PRODUCT( verify_graph_edges(); )
831
832  // Now optimize
833  Optimize();
834  if (failing())  return;
835  NOT_PRODUCT( verify_graph_edges(); )
836
837#ifndef PRODUCT
838  if (PrintIdeal) {
839    ttyLocker ttyl;  // keep the following output all in one block
840    // This output goes directly to the tty, not the compiler log.
841    // To enable tools to match it up with the compilation activity,
842    // be sure to tag this tty output with the compile ID.
843    if (xtty != NULL) {
844      xtty->head("ideal compile_id='%d'%s", compile_id(),
845                 is_osr_compilation()    ? " compile_kind='osr'" :
846                 "");
847    }
848    root()->dump(9999);
849    if (xtty != NULL) {
850      xtty->tail("ideal");
851    }
852  }
853#endif
854
855  NOT_PRODUCT( verify_barriers(); )
856
857  // Dump compilation data to replay it.
858  if (method()->has_option("DumpReplay")) {
859    env()->dump_replay_data(_compile_id);
860  }
861  if (method()->has_option("DumpInline") && (ilt() != NULL)) {
862    env()->dump_inline_data(_compile_id);
863  }
864
865  // Now that we know the size of all the monitors we can add a fixed slot
866  // for the original deopt pc.
867
868  _orig_pc_slot =  fixed_slots();
869  int next_slot = _orig_pc_slot + (sizeof(address) / VMRegImpl::stack_slot_size);
870  set_fixed_slots(next_slot);
871
872  // Compute when to use implicit null checks. Used by matching trap based
873  // nodes and NullCheck optimization.
874  set_allowed_deopt_reasons();
875
876  // Now generate code
877  Code_Gen();
878  if (failing())  return;
879
880  // Check if we want to skip execution of all compiled code.
881  {
882#ifndef PRODUCT
883    if (OptoNoExecute) {
884      record_method_not_compilable("+OptoNoExecute");  // Flag as failed
885      return;
886    }
887    TracePhase t2("install_code", &_t_registerMethod, TimeCompiler);
888#endif
889
890    if (is_osr_compilation()) {
891      _code_offsets.set_value(CodeOffsets::Verified_Entry, 0);
892      _code_offsets.set_value(CodeOffsets::OSR_Entry, _first_block_size);
893    } else {
894      _code_offsets.set_value(CodeOffsets::Verified_Entry, _first_block_size);
895      _code_offsets.set_value(CodeOffsets::OSR_Entry, 0);
896    }
897
898    env()->register_method(_method, _entry_bci,
899                           &_code_offsets,
900                           _orig_pc_slot_offset_in_bytes,
901                           code_buffer(),
902                           frame_size_in_words(), _oop_map_set,
903                           &_handler_table, &_inc_table,
904                           compiler,
905                           env()->comp_level(),
906                           has_unsafe_access(),
907                           SharedRuntime::is_wide_vector(max_vector_size()),
908                           rtm_state()
909                           );
910
911    if (log() != NULL) // Print code cache state into compiler log
912      log()->code_cache_state();
913  }
914}
915
916//------------------------------Compile----------------------------------------
917// Compile a runtime stub
918Compile::Compile( ciEnv* ci_env,
919                  TypeFunc_generator generator,
920                  address stub_function,
921                  const char *stub_name,
922                  int is_fancy_jump,
923                  bool pass_tls,
924                  bool save_arg_registers,
925                  bool return_pc )
926  : Phase(Compiler),
927    _env(ci_env),
928    _log(ci_env->log()),
929    _compile_id(0),
930    _save_argument_registers(save_arg_registers),
931    _method(NULL),
932    _stub_name(stub_name),
933    _stub_function(stub_function),
934    _stub_entry_point(NULL),
935    _entry_bci(InvocationEntryBci),
936    _initial_gvn(NULL),
937    _for_igvn(NULL),
938    _warm_calls(NULL),
939    _orig_pc_slot(0),
940    _orig_pc_slot_offset_in_bytes(0),
941    _subsume_loads(true),
942    _do_escape_analysis(false),
943    _eliminate_boxing(false),
944    _failure_reason(NULL),
945    _code_buffer("Compile::Fill_buffer"),
946    _has_method_handle_invokes(false),
947    _mach_constant_base_node(NULL),
948    _node_bundling_limit(0),
949    _node_bundling_base(NULL),
950    _java_calls(0),
951    _inner_loops(0),
952#ifndef PRODUCT
953    _trace_opto_output(TraceOptoOutput),
954    _in_dump_cnt(0),
955    _printer(NULL),
956#endif
957    _dead_node_list(comp_arena()),
958    _dead_node_count(0),
959    _congraph(NULL),
960    _replay_inline_data(NULL),
961    _number_of_mh_late_inlines(0),
962    _inlining_progress(false),
963    _inlining_incrementally(false),
964    _print_inlining_list(NULL),
965    _print_inlining_stream(NULL),
966    _print_inlining_idx(0),
967    _print_inlining_output(NULL),
968    _allowed_reasons(0),
969    _interpreter_frame_size(0) {
970  C = this;
971
972#ifndef PRODUCT
973  TraceTime t1(NULL, &_t_totalCompilation, TimeCompiler, false);
974  TraceTime t2(NULL, &_t_stubCompilation, TimeCompiler, false);
975  set_print_assembly(PrintFrameConverterAssembly);
976  set_parsed_irreducible_loop(false);
977#endif
978  set_has_irreducible_loop(false); // no loops
979
980  CompileWrapper cw(this);
981  Init(/*AliasLevel=*/ 0);
982  init_tf((*generator)());
983
984  {
985    // The following is a dummy for the sake of GraphKit::gen_stub
986    Unique_Node_List for_igvn(comp_arena());
987    set_for_igvn(&for_igvn);  // not used, but some GraphKit guys push on this
988    PhaseGVN gvn(Thread::current()->resource_area(),255);
989    set_initial_gvn(&gvn);    // not significant, but GraphKit guys use it pervasively
990    gvn.transform_no_reclaim(top());
991
992    GraphKit kit;
993    kit.gen_stub(stub_function, stub_name, is_fancy_jump, pass_tls, return_pc);
994  }
995
996  NOT_PRODUCT( verify_graph_edges(); )
997  Code_Gen();
998  if (failing())  return;
999
1000
1001  // Entry point will be accessed using compile->stub_entry_point();
1002  if (code_buffer() == NULL) {
1003    Matcher::soft_match_failure();
1004  } else {
1005    if (PrintAssembly && (WizardMode || Verbose))
1006      tty->print_cr("### Stub::%s", stub_name);
1007
1008    if (!failing()) {
1009      assert(_fixed_slots == 0, "no fixed slots used for runtime stubs");
1010
1011      // Make the NMethod
1012      // For now we mark the frame as never safe for profile stackwalking
1013      RuntimeStub *rs = RuntimeStub::new_runtime_stub(stub_name,
1014                                                      code_buffer(),
1015                                                      CodeOffsets::frame_never_safe,
1016                                                      // _code_offsets.value(CodeOffsets::Frame_Complete),
1017                                                      frame_size_in_words(),
1018                                                      _oop_map_set,
1019                                                      save_arg_registers);
1020      assert(rs != NULL && rs->is_runtime_stub(), "sanity check");
1021
1022      _stub_entry_point = rs->entry_point();
1023    }
1024  }
1025}
1026
1027//------------------------------Init-------------------------------------------
1028// Prepare for a single compilation
1029void Compile::Init(int aliaslevel) {
1030  _unique  = 0;
1031  _regalloc = NULL;
1032
1033  _tf      = NULL;  // filled in later
1034  _top     = NULL;  // cached later
1035  _matcher = NULL;  // filled in later
1036  _cfg     = NULL;  // filled in later
1037
1038  set_24_bit_selection_and_mode(Use24BitFP, false);
1039
1040  _node_note_array = NULL;
1041  _default_node_notes = NULL;
1042
1043  _immutable_memory = NULL; // filled in at first inquiry
1044
1045  // Globally visible Nodes
1046  // First set TOP to NULL to give safe behavior during creation of RootNode
1047  set_cached_top_node(NULL);
1048  set_root(new RootNode());
1049  // Now that you have a Root to point to, create the real TOP
1050  set_cached_top_node( new ConNode(Type::TOP) );
1051  set_recent_alloc(NULL, NULL);
1052
1053  // Create Debug Information Recorder to record scopes, oopmaps, etc.
1054  env()->set_oop_recorder(new OopRecorder(env()->arena()));
1055  env()->set_debug_info(new DebugInformationRecorder(env()->oop_recorder()));
1056  env()->set_dependencies(new Dependencies(env()));
1057
1058  _fixed_slots = 0;
1059  set_has_split_ifs(false);
1060  set_has_loops(has_method() && method()->has_loops()); // first approximation
1061  set_has_stringbuilder(false);
1062  set_has_boxed_value(false);
1063  _trap_can_recompile = false;  // no traps emitted yet
1064  _major_progress = true; // start out assuming good things will happen
1065  set_has_unsafe_access(false);
1066  set_max_vector_size(0);
1067  Copy::zero_to_bytes(_trap_hist, sizeof(_trap_hist));
1068  set_decompile_count(0);
1069
1070  set_do_freq_based_layout(BlockLayoutByFrequency || method_has_option("BlockLayoutByFrequency"));
1071  set_num_loop_opts(LoopOptsCount);
1072  set_do_inlining(Inline);
1073  set_max_inline_size(MaxInlineSize);
1074  set_freq_inline_size(FreqInlineSize);
1075  set_do_scheduling(OptoScheduling);
1076  set_do_count_invocations(false);
1077  set_do_method_data_update(false);
1078  set_age_code(has_method() && method()->profile_aging());
1079  set_rtm_state(NoRTM); // No RTM lock eliding by default
1080#if INCLUDE_RTM_OPT
1081  if (UseRTMLocking && has_method() && (method()->method_data_or_null() != NULL)) {
1082    int rtm_state = method()->method_data()->rtm_state();
1083    if (method_has_option("NoRTMLockEliding") || ((rtm_state & NoRTM) != 0)) {
1084      // Don't generate RTM lock eliding code.
1085      set_rtm_state(NoRTM);
1086    } else if (method_has_option("UseRTMLockEliding") || ((rtm_state & UseRTM) != 0) || !UseRTMDeopt) {
1087      // Generate RTM lock eliding code without abort ratio calculation code.
1088      set_rtm_state(UseRTM);
1089    } else if (UseRTMDeopt) {
1090      // Generate RTM lock eliding code and include abort ratio calculation
1091      // code if UseRTMDeopt is on.
1092      set_rtm_state(ProfileRTM);
1093    }
1094  }
1095#endif
1096  if (debug_info()->recording_non_safepoints()) {
1097    set_node_note_array(new(comp_arena()) GrowableArray<Node_Notes*>
1098                        (comp_arena(), 8, 0, NULL));
1099    set_default_node_notes(Node_Notes::make(this));
1100  }
1101
1102  // // -- Initialize types before each compile --
1103  // // Update cached type information
1104  // if( _method && _method->constants() )
1105  //   Type::update_loaded_types(_method, _method->constants());
1106
1107  // Init alias_type map.
1108  if (!_do_escape_analysis && aliaslevel == 3)
1109    aliaslevel = 2;  // No unique types without escape analysis
1110  _AliasLevel = aliaslevel;
1111  const int grow_ats = 16;
1112  _max_alias_types = grow_ats;
1113  _alias_types   = NEW_ARENA_ARRAY(comp_arena(), AliasType*, grow_ats);
1114  AliasType* ats = NEW_ARENA_ARRAY(comp_arena(), AliasType,  grow_ats);
1115  Copy::zero_to_bytes(ats, sizeof(AliasType)*grow_ats);
1116  {
1117    for (int i = 0; i < grow_ats; i++)  _alias_types[i] = &ats[i];
1118  }
1119  // Initialize the first few types.
1120  _alias_types[AliasIdxTop]->Init(AliasIdxTop, NULL);
1121  _alias_types[AliasIdxBot]->Init(AliasIdxBot, TypePtr::BOTTOM);
1122  _alias_types[AliasIdxRaw]->Init(AliasIdxRaw, TypeRawPtr::BOTTOM);
1123  _num_alias_types = AliasIdxRaw+1;
1124  // Zero out the alias type cache.
1125  Copy::zero_to_bytes(_alias_cache, sizeof(_alias_cache));
1126  // A NULL adr_type hits in the cache right away.  Preload the right answer.
1127  probe_alias_cache(NULL)->_index = AliasIdxTop;
1128
1129  _intrinsics = NULL;
1130  _macro_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1131  _predicate_opaqs = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1132  _expensive_nodes = new(comp_arena()) GrowableArray<Node*>(comp_arena(), 8,  0, NULL);
1133  register_library_intrinsics();
1134}
1135
1136//---------------------------init_start----------------------------------------
1137// Install the StartNode on this compile object.
1138void Compile::init_start(StartNode* s) {
1139  if (failing())
1140    return; // already failing
1141  assert(s == start(), "");
1142}
1143
1144StartNode* Compile::start() const {
1145  assert(!failing(), "");
1146  for (DUIterator_Fast imax, i = root()->fast_outs(imax); i < imax; i++) {
1147    Node* start = root()->fast_out(i);
1148    if( start->is_Start() )
1149      return start->as_Start();
1150  }
1151  fatal("Did not find Start node!");
1152  return NULL;
1153}
1154
1155//-------------------------------immutable_memory-------------------------------------
1156// Access immutable memory
1157Node* Compile::immutable_memory() {
1158  if (_immutable_memory != NULL) {
1159    return _immutable_memory;
1160  }
1161  StartNode* s = start();
1162  for (DUIterator_Fast imax, i = s->fast_outs(imax); true; i++) {
1163    Node *p = s->fast_out(i);
1164    if (p != s && p->as_Proj()->_con == TypeFunc::Memory) {
1165      _immutable_memory = p;
1166      return _immutable_memory;
1167    }
1168  }
1169  ShouldNotReachHere();
1170  return NULL;
1171}
1172
1173//----------------------set_cached_top_node------------------------------------
1174// Install the cached top node, and make sure Node::is_top works correctly.
1175void Compile::set_cached_top_node(Node* tn) {
1176  if (tn != NULL)  verify_top(tn);
1177  Node* old_top = _top;
1178  _top = tn;
1179  // Calling Node::setup_is_top allows the nodes the chance to adjust
1180  // their _out arrays.
1181  if (_top != NULL)     _top->setup_is_top();
1182  if (old_top != NULL)  old_top->setup_is_top();
1183  assert(_top == NULL || top()->is_top(), "");
1184}
1185
1186#ifdef ASSERT
1187uint Compile::count_live_nodes_by_graph_walk() {
1188  Unique_Node_List useful(comp_arena());
1189  // Get useful node list by walking the graph.
1190  identify_useful_nodes(useful);
1191  return useful.size();
1192}
1193
1194void Compile::print_missing_nodes() {
1195
1196  // Return if CompileLog is NULL and PrintIdealNodeCount is false.
1197  if ((_log == NULL) && (! PrintIdealNodeCount)) {
1198    return;
1199  }
1200
1201  // This is an expensive function. It is executed only when the user
1202  // specifies VerifyIdealNodeCount option or otherwise knows the
1203  // additional work that needs to be done to identify reachable nodes
1204  // by walking the flow graph and find the missing ones using
1205  // _dead_node_list.
1206
1207  Unique_Node_List useful(comp_arena());
1208  // Get useful node list by walking the graph.
1209  identify_useful_nodes(useful);
1210
1211  uint l_nodes = C->live_nodes();
1212  uint l_nodes_by_walk = useful.size();
1213
1214  if (l_nodes != l_nodes_by_walk) {
1215    if (_log != NULL) {
1216      _log->begin_head("mismatched_nodes count='%d'", abs((int) (l_nodes - l_nodes_by_walk)));
1217      _log->stamp();
1218      _log->end_head();
1219    }
1220    VectorSet& useful_member_set = useful.member_set();
1221    int last_idx = l_nodes_by_walk;
1222    for (int i = 0; i < last_idx; i++) {
1223      if (useful_member_set.test(i)) {
1224        if (_dead_node_list.test(i)) {
1225          if (_log != NULL) {
1226            _log->elem("mismatched_node_info node_idx='%d' type='both live and dead'", i);
1227          }
1228          if (PrintIdealNodeCount) {
1229            // Print the log message to tty
1230              tty->print_cr("mismatched_node idx='%d' both live and dead'", i);
1231              useful.at(i)->dump();
1232          }
1233        }
1234      }
1235      else if (! _dead_node_list.test(i)) {
1236        if (_log != NULL) {
1237          _log->elem("mismatched_node_info node_idx='%d' type='neither live nor dead'", i);
1238        }
1239        if (PrintIdealNodeCount) {
1240          // Print the log message to tty
1241          tty->print_cr("mismatched_node idx='%d' type='neither live nor dead'", i);
1242        }
1243      }
1244    }
1245    if (_log != NULL) {
1246      _log->tail("mismatched_nodes");
1247    }
1248  }
1249}
1250#endif
1251
1252#ifndef PRODUCT
1253void Compile::verify_top(Node* tn) const {
1254  if (tn != NULL) {
1255    assert(tn->is_Con(), "top node must be a constant");
1256    assert(((ConNode*)tn)->type() == Type::TOP, "top node must have correct type");
1257    assert(tn->in(0) != NULL, "must have live top node");
1258  }
1259}
1260#endif
1261
1262
1263///-------------------Managing Per-Node Debug & Profile Info-------------------
1264
1265void Compile::grow_node_notes(GrowableArray<Node_Notes*>* arr, int grow_by) {
1266  guarantee(arr != NULL, "");
1267  int num_blocks = arr->length();
1268  if (grow_by < num_blocks)  grow_by = num_blocks;
1269  int num_notes = grow_by * _node_notes_block_size;
1270  Node_Notes* notes = NEW_ARENA_ARRAY(node_arena(), Node_Notes, num_notes);
1271  Copy::zero_to_bytes(notes, num_notes * sizeof(Node_Notes));
1272  while (num_notes > 0) {
1273    arr->append(notes);
1274    notes     += _node_notes_block_size;
1275    num_notes -= _node_notes_block_size;
1276  }
1277  assert(num_notes == 0, "exact multiple, please");
1278}
1279
1280bool Compile::copy_node_notes_to(Node* dest, Node* source) {
1281  if (source == NULL || dest == NULL)  return false;
1282
1283  if (dest->is_Con())
1284    return false;               // Do not push debug info onto constants.
1285
1286#ifdef ASSERT
1287  // Leave a bread crumb trail pointing to the original node:
1288  if (dest != NULL && dest != source && dest->debug_orig() == NULL) {
1289    dest->set_debug_orig(source);
1290  }
1291#endif
1292
1293  if (node_note_array() == NULL)
1294    return false;               // Not collecting any notes now.
1295
1296  // This is a copy onto a pre-existing node, which may already have notes.
1297  // If both nodes have notes, do not overwrite any pre-existing notes.
1298  Node_Notes* source_notes = node_notes_at(source->_idx);
1299  if (source_notes == NULL || source_notes->is_clear())  return false;
1300  Node_Notes* dest_notes   = node_notes_at(dest->_idx);
1301  if (dest_notes == NULL || dest_notes->is_clear()) {
1302    return set_node_notes_at(dest->_idx, source_notes);
1303  }
1304
1305  Node_Notes merged_notes = (*source_notes);
1306  // The order of operations here ensures that dest notes will win...
1307  merged_notes.update_from(dest_notes);
1308  return set_node_notes_at(dest->_idx, &merged_notes);
1309}
1310
1311
1312//--------------------------allow_range_check_smearing-------------------------
1313// Gating condition for coalescing similar range checks.
1314// Sometimes we try 'speculatively' replacing a series of a range checks by a
1315// single covering check that is at least as strong as any of them.
1316// If the optimization succeeds, the simplified (strengthened) range check
1317// will always succeed.  If it fails, we will deopt, and then give up
1318// on the optimization.
1319bool Compile::allow_range_check_smearing() const {
1320  // If this method has already thrown a range-check,
1321  // assume it was because we already tried range smearing
1322  // and it failed.
1323  uint already_trapped = trap_count(Deoptimization::Reason_range_check);
1324  return !already_trapped;
1325}
1326
1327
1328//------------------------------flatten_alias_type-----------------------------
1329const TypePtr *Compile::flatten_alias_type( const TypePtr *tj ) const {
1330  int offset = tj->offset();
1331  TypePtr::PTR ptr = tj->ptr();
1332
1333  // Known instance (scalarizable allocation) alias only with itself.
1334  bool is_known_inst = tj->isa_oopptr() != NULL &&
1335                       tj->is_oopptr()->is_known_instance();
1336
1337  // Process weird unsafe references.
1338  if (offset == Type::OffsetBot && (tj->isa_instptr() /*|| tj->isa_klassptr()*/)) {
1339    assert(InlineUnsafeOps, "indeterminate pointers come only from unsafe ops");
1340    assert(!is_known_inst, "scalarizable allocation should not have unsafe references");
1341    tj = TypeOopPtr::BOTTOM;
1342    ptr = tj->ptr();
1343    offset = tj->offset();
1344  }
1345
1346  // Array pointers need some flattening
1347  const TypeAryPtr *ta = tj->isa_aryptr();
1348  if (ta && ta->is_stable()) {
1349    // Erase stability property for alias analysis.
1350    tj = ta = ta->cast_to_stable(false);
1351  }
1352  if( ta && is_known_inst ) {
1353    if ( offset != Type::OffsetBot &&
1354         offset > arrayOopDesc::length_offset_in_bytes() ) {
1355      offset = Type::OffsetBot; // Flatten constant access into array body only
1356      tj = ta = TypeAryPtr::make(ptr, ta->ary(), ta->klass(), true, offset, ta->instance_id());
1357    }
1358  } else if( ta && _AliasLevel >= 2 ) {
1359    // For arrays indexed by constant indices, we flatten the alias
1360    // space to include all of the array body.  Only the header, klass
1361    // and array length can be accessed un-aliased.
1362    if( offset != Type::OffsetBot ) {
1363      if( ta->const_oop() ) { // MethodData* or Method*
1364        offset = Type::OffsetBot;   // Flatten constant access into array body
1365        tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),ta->ary(),ta->klass(),false,offset);
1366      } else if( offset == arrayOopDesc::length_offset_in_bytes() ) {
1367        // range is OK as-is.
1368        tj = ta = TypeAryPtr::RANGE;
1369      } else if( offset == oopDesc::klass_offset_in_bytes() ) {
1370        tj = TypeInstPtr::KLASS; // all klass loads look alike
1371        ta = TypeAryPtr::RANGE; // generic ignored junk
1372        ptr = TypePtr::BotPTR;
1373      } else if( offset == oopDesc::mark_offset_in_bytes() ) {
1374        tj = TypeInstPtr::MARK;
1375        ta = TypeAryPtr::RANGE; // generic ignored junk
1376        ptr = TypePtr::BotPTR;
1377      } else {                  // Random constant offset into array body
1378        offset = Type::OffsetBot;   // Flatten constant access into array body
1379        tj = ta = TypeAryPtr::make(ptr,ta->ary(),ta->klass(),false,offset);
1380      }
1381    }
1382    // Arrays of fixed size alias with arrays of unknown size.
1383    if (ta->size() != TypeInt::POS) {
1384      const TypeAry *tary = TypeAry::make(ta->elem(), TypeInt::POS);
1385      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,ta->klass(),false,offset);
1386    }
1387    // Arrays of known objects become arrays of unknown objects.
1388    if (ta->elem()->isa_narrowoop() && ta->elem() != TypeNarrowOop::BOTTOM) {
1389      const TypeAry *tary = TypeAry::make(TypeNarrowOop::BOTTOM, ta->size());
1390      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
1391    }
1392    if (ta->elem()->isa_oopptr() && ta->elem() != TypeInstPtr::BOTTOM) {
1393      const TypeAry *tary = TypeAry::make(TypeInstPtr::BOTTOM, ta->size());
1394      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,NULL,false,offset);
1395    }
1396    // Arrays of bytes and of booleans both use 'bastore' and 'baload' so
1397    // cannot be distinguished by bytecode alone.
1398    if (ta->elem() == TypeInt::BOOL) {
1399      const TypeAry *tary = TypeAry::make(TypeInt::BYTE, ta->size());
1400      ciKlass* aklass = ciTypeArrayKlass::make(T_BYTE);
1401      tj = ta = TypeAryPtr::make(ptr,ta->const_oop(),tary,aklass,false,offset);
1402    }
1403    // During the 2nd round of IterGVN, NotNull castings are removed.
1404    // Make sure the Bottom and NotNull variants alias the same.
1405    // Also, make sure exact and non-exact variants alias the same.
1406    if (ptr == TypePtr::NotNull || ta->klass_is_exact() || ta->speculative() != NULL) {
1407      tj = ta = TypeAryPtr::make(TypePtr::BotPTR,ta->ary(),ta->klass(),false,offset);
1408    }
1409  }
1410
1411  // Oop pointers need some flattening
1412  const TypeInstPtr *to = tj->isa_instptr();
1413  if( to && _AliasLevel >= 2 && to != TypeOopPtr::BOTTOM ) {
1414    ciInstanceKlass *k = to->klass()->as_instance_klass();
1415    if( ptr == TypePtr::Constant ) {
1416      if (to->klass() != ciEnv::current()->Class_klass() ||
1417          offset < k->size_helper() * wordSize) {
1418        // No constant oop pointers (such as Strings); they alias with
1419        // unknown strings.
1420        assert(!is_known_inst, "not scalarizable allocation");
1421        tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
1422      }
1423    } else if( is_known_inst ) {
1424      tj = to; // Keep NotNull and klass_is_exact for instance type
1425    } else if( ptr == TypePtr::NotNull || to->klass_is_exact() ) {
1426      // During the 2nd round of IterGVN, NotNull castings are removed.
1427      // Make sure the Bottom and NotNull variants alias the same.
1428      // Also, make sure exact and non-exact variants alias the same.
1429      tj = to = TypeInstPtr::make(TypePtr::BotPTR,to->klass(),false,0,offset);
1430    }
1431    if (to->speculative() != NULL) {
1432      tj = to = TypeInstPtr::make(to->ptr(),to->klass(),to->klass_is_exact(),to->const_oop(),to->offset(), to->instance_id());
1433    }
1434    // Canonicalize the holder of this field
1435    if (offset >= 0 && offset < instanceOopDesc::base_offset_in_bytes()) {
1436      // First handle header references such as a LoadKlassNode, even if the
1437      // object's klass is unloaded at compile time (4965979).
1438      if (!is_known_inst) { // Do it only for non-instance types
1439        tj = to = TypeInstPtr::make(TypePtr::BotPTR, env()->Object_klass(), false, NULL, offset);
1440      }
1441    } else if (offset < 0 || offset >= k->size_helper() * wordSize) {
1442      // Static fields are in the space above the normal instance
1443      // fields in the java.lang.Class instance.
1444      if (to->klass() != ciEnv::current()->Class_klass()) {
1445        to = NULL;
1446        tj = TypeOopPtr::BOTTOM;
1447        offset = tj->offset();
1448      }
1449    } else {
1450      ciInstanceKlass *canonical_holder = k->get_canonical_holder(offset);
1451      if (!k->equals(canonical_holder) || tj->offset() != offset) {
1452        if( is_known_inst ) {
1453          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, true, NULL, offset, to->instance_id());
1454        } else {
1455          tj = to = TypeInstPtr::make(to->ptr(), canonical_holder, false, NULL, offset);
1456        }
1457      }
1458    }
1459  }
1460
1461  // Klass pointers to object array klasses need some flattening
1462  const TypeKlassPtr *tk = tj->isa_klassptr();
1463  if( tk ) {
1464    // If we are referencing a field within a Klass, we need
1465    // to assume the worst case of an Object.  Both exact and
1466    // inexact types must flatten to the same alias class so
1467    // use NotNull as the PTR.
1468    if ( offset == Type::OffsetBot || (offset >= 0 && (size_t)offset < sizeof(Klass)) ) {
1469
1470      tj = tk = TypeKlassPtr::make(TypePtr::NotNull,
1471                                   TypeKlassPtr::OBJECT->klass(),
1472                                   offset);
1473    }
1474
1475    ciKlass* klass = tk->klass();
1476    if( klass->is_obj_array_klass() ) {
1477      ciKlass* k = TypeAryPtr::OOPS->klass();
1478      if( !k || !k->is_loaded() )                  // Only fails for some -Xcomp runs
1479        k = TypeInstPtr::BOTTOM->klass();
1480      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, k, offset );
1481    }
1482
1483    // Check for precise loads from the primary supertype array and force them
1484    // to the supertype cache alias index.  Check for generic array loads from
1485    // the primary supertype array and also force them to the supertype cache
1486    // alias index.  Since the same load can reach both, we need to merge
1487    // these 2 disparate memories into the same alias class.  Since the
1488    // primary supertype array is read-only, there's no chance of confusion
1489    // where we bypass an array load and an array store.
1490    int primary_supers_offset = in_bytes(Klass::primary_supers_offset());
1491    if (offset == Type::OffsetBot ||
1492        (offset >= primary_supers_offset &&
1493         offset < (int)(primary_supers_offset + Klass::primary_super_limit() * wordSize)) ||
1494        offset == (int)in_bytes(Klass::secondary_super_cache_offset())) {
1495      offset = in_bytes(Klass::secondary_super_cache_offset());
1496      tj = tk = TypeKlassPtr::make( TypePtr::NotNull, tk->klass(), offset );
1497    }
1498  }
1499
1500  // Flatten all Raw pointers together.
1501  if (tj->base() == Type::RawPtr)
1502    tj = TypeRawPtr::BOTTOM;
1503
1504  if (tj->base() == Type::AnyPtr)
1505    tj = TypePtr::BOTTOM;      // An error, which the caller must check for.
1506
1507  // Flatten all to bottom for now
1508  switch( _AliasLevel ) {
1509  case 0:
1510    tj = TypePtr::BOTTOM;
1511    break;
1512  case 1:                       // Flatten to: oop, static, field or array
1513    switch (tj->base()) {
1514    //case Type::AryPtr: tj = TypeAryPtr::RANGE;    break;
1515    case Type::RawPtr:   tj = TypeRawPtr::BOTTOM;   break;
1516    case Type::AryPtr:   // do not distinguish arrays at all
1517    case Type::InstPtr:  tj = TypeInstPtr::BOTTOM;  break;
1518    case Type::KlassPtr: tj = TypeKlassPtr::OBJECT; break;
1519    case Type::AnyPtr:   tj = TypePtr::BOTTOM;      break;  // caller checks it
1520    default: ShouldNotReachHere();
1521    }
1522    break;
1523  case 2:                       // No collapsing at level 2; keep all splits
1524  case 3:                       // No collapsing at level 3; keep all splits
1525    break;
1526  default:
1527    Unimplemented();
1528  }
1529
1530  offset = tj->offset();
1531  assert( offset != Type::OffsetTop, "Offset has fallen from constant" );
1532
1533  assert( (offset != Type::OffsetBot && tj->base() != Type::AryPtr) ||
1534          (offset == Type::OffsetBot && tj->base() == Type::AryPtr) ||
1535          (offset == Type::OffsetBot && tj == TypeOopPtr::BOTTOM) ||
1536          (offset == Type::OffsetBot && tj == TypePtr::BOTTOM) ||
1537          (offset == oopDesc::mark_offset_in_bytes() && tj->base() == Type::AryPtr) ||
1538          (offset == oopDesc::klass_offset_in_bytes() && tj->base() == Type::AryPtr) ||
1539          (offset == arrayOopDesc::length_offset_in_bytes() && tj->base() == Type::AryPtr)  ,
1540          "For oops, klasses, raw offset must be constant; for arrays the offset is never known" );
1541  assert( tj->ptr() != TypePtr::TopPTR &&
1542          tj->ptr() != TypePtr::AnyNull &&
1543          tj->ptr() != TypePtr::Null, "No imprecise addresses" );
1544//    assert( tj->ptr() != TypePtr::Constant ||
1545//            tj->base() == Type::RawPtr ||
1546//            tj->base() == Type::KlassPtr, "No constant oop addresses" );
1547
1548  return tj;
1549}
1550
1551void Compile::AliasType::Init(int i, const TypePtr* at) {
1552  _index = i;
1553  _adr_type = at;
1554  _field = NULL;
1555  _element = NULL;
1556  _is_rewritable = true; // default
1557  const TypeOopPtr *atoop = (at != NULL) ? at->isa_oopptr() : NULL;
1558  if (atoop != NULL && atoop->is_known_instance()) {
1559    const TypeOopPtr *gt = atoop->cast_to_instance_id(TypeOopPtr::InstanceBot);
1560    _general_index = Compile::current()->get_alias_index(gt);
1561  } else {
1562    _general_index = 0;
1563  }
1564}
1565
1566//---------------------------------print_on------------------------------------
1567#ifndef PRODUCT
1568void Compile::AliasType::print_on(outputStream* st) {
1569  if (index() < 10)
1570        st->print("@ <%d> ", index());
1571  else  st->print("@ <%d>",  index());
1572  st->print(is_rewritable() ? "   " : " RO");
1573  int offset = adr_type()->offset();
1574  if (offset == Type::OffsetBot)
1575        st->print(" +any");
1576  else  st->print(" +%-3d", offset);
1577  st->print(" in ");
1578  adr_type()->dump_on(st);
1579  const TypeOopPtr* tjp = adr_type()->isa_oopptr();
1580  if (field() != NULL && tjp) {
1581    if (tjp->klass()  != field()->holder() ||
1582        tjp->offset() != field()->offset_in_bytes()) {
1583      st->print(" != ");
1584      field()->print();
1585      st->print(" ***");
1586    }
1587  }
1588}
1589
1590void print_alias_types() {
1591  Compile* C = Compile::current();
1592  tty->print_cr("--- Alias types, AliasIdxBot .. %d", C->num_alias_types()-1);
1593  for (int idx = Compile::AliasIdxBot; idx < C->num_alias_types(); idx++) {
1594    C->alias_type(idx)->print_on(tty);
1595    tty->cr();
1596  }
1597}
1598#endif
1599
1600
1601//----------------------------probe_alias_cache--------------------------------
1602Compile::AliasCacheEntry* Compile::probe_alias_cache(const TypePtr* adr_type) {
1603  intptr_t key = (intptr_t) adr_type;
1604  key ^= key >> logAliasCacheSize;
1605  return &_alias_cache[key & right_n_bits(logAliasCacheSize)];
1606}
1607
1608
1609//-----------------------------grow_alias_types--------------------------------
1610void Compile::grow_alias_types() {
1611  const int old_ats  = _max_alias_types; // how many before?
1612  const int new_ats  = old_ats;          // how many more?
1613  const int grow_ats = old_ats+new_ats;  // how many now?
1614  _max_alias_types = grow_ats;
1615  _alias_types =  REALLOC_ARENA_ARRAY(comp_arena(), AliasType*, _alias_types, old_ats, grow_ats);
1616  AliasType* ats =    NEW_ARENA_ARRAY(comp_arena(), AliasType, new_ats);
1617  Copy::zero_to_bytes(ats, sizeof(AliasType)*new_ats);
1618  for (int i = 0; i < new_ats; i++)  _alias_types[old_ats+i] = &ats[i];
1619}
1620
1621
1622//--------------------------------find_alias_type------------------------------
1623Compile::AliasType* Compile::find_alias_type(const TypePtr* adr_type, bool no_create, ciField* original_field) {
1624  if (_AliasLevel == 0)
1625    return alias_type(AliasIdxBot);
1626
1627  AliasCacheEntry* ace = probe_alias_cache(adr_type);
1628  if (ace->_adr_type == adr_type) {
1629    return alias_type(ace->_index);
1630  }
1631
1632  // Handle special cases.
1633  if (adr_type == NULL)             return alias_type(AliasIdxTop);
1634  if (adr_type == TypePtr::BOTTOM)  return alias_type(AliasIdxBot);
1635
1636  // Do it the slow way.
1637  const TypePtr* flat = flatten_alias_type(adr_type);
1638
1639#ifdef ASSERT
1640  assert(flat == flatten_alias_type(flat), "idempotent");
1641  assert(flat != TypePtr::BOTTOM,     "cannot alias-analyze an untyped ptr");
1642  if (flat->isa_oopptr() && !flat->isa_klassptr()) {
1643    const TypeOopPtr* foop = flat->is_oopptr();
1644    // Scalarizable allocations have exact klass always.
1645    bool exact = !foop->klass_is_exact() || foop->is_known_instance();
1646    const TypePtr* xoop = foop->cast_to_exactness(exact)->is_ptr();
1647    assert(foop == flatten_alias_type(xoop), "exactness must not affect alias type");
1648  }
1649  assert(flat == flatten_alias_type(flat), "exact bit doesn't matter");
1650#endif
1651
1652  int idx = AliasIdxTop;
1653  for (int i = 0; i < num_alias_types(); i++) {
1654    if (alias_type(i)->adr_type() == flat) {
1655      idx = i;
1656      break;
1657    }
1658  }
1659
1660  if (idx == AliasIdxTop) {
1661    if (no_create)  return NULL;
1662    // Grow the array if necessary.
1663    if (_num_alias_types == _max_alias_types)  grow_alias_types();
1664    // Add a new alias type.
1665    idx = _num_alias_types++;
1666    _alias_types[idx]->Init(idx, flat);
1667    if (flat == TypeInstPtr::KLASS)  alias_type(idx)->set_rewritable(false);
1668    if (flat == TypeAryPtr::RANGE)   alias_type(idx)->set_rewritable(false);
1669    if (flat->isa_instptr()) {
1670      if (flat->offset() == java_lang_Class::klass_offset_in_bytes()
1671          && flat->is_instptr()->klass() == env()->Class_klass())
1672        alias_type(idx)->set_rewritable(false);
1673    }
1674    if (flat->isa_aryptr()) {
1675#ifdef ASSERT
1676      const int header_size_min  = arrayOopDesc::base_offset_in_bytes(T_BYTE);
1677      // (T_BYTE has the weakest alignment and size restrictions...)
1678      assert(flat->offset() < header_size_min, "array body reference must be OffsetBot");
1679#endif
1680      if (flat->offset() == TypePtr::OffsetBot) {
1681        alias_type(idx)->set_element(flat->is_aryptr()->elem());
1682      }
1683    }
1684    if (flat->isa_klassptr()) {
1685      if (flat->offset() == in_bytes(Klass::super_check_offset_offset()))
1686        alias_type(idx)->set_rewritable(false);
1687      if (flat->offset() == in_bytes(Klass::modifier_flags_offset()))
1688        alias_type(idx)->set_rewritable(false);
1689      if (flat->offset() == in_bytes(Klass::access_flags_offset()))
1690        alias_type(idx)->set_rewritable(false);
1691      if (flat->offset() == in_bytes(Klass::java_mirror_offset()))
1692        alias_type(idx)->set_rewritable(false);
1693    }
1694    // %%% (We would like to finalize JavaThread::threadObj_offset(),
1695    // but the base pointer type is not distinctive enough to identify
1696    // references into JavaThread.)
1697
1698    // Check for final fields.
1699    const TypeInstPtr* tinst = flat->isa_instptr();
1700    if (tinst && tinst->offset() >= instanceOopDesc::base_offset_in_bytes()) {
1701      ciField* field;
1702      if (tinst->const_oop() != NULL &&
1703          tinst->klass() == ciEnv::current()->Class_klass() &&
1704          tinst->offset() >= (tinst->klass()->as_instance_klass()->size_helper() * wordSize)) {
1705        // static field
1706        ciInstanceKlass* k = tinst->const_oop()->as_instance()->java_lang_Class_klass()->as_instance_klass();
1707        field = k->get_field_by_offset(tinst->offset(), true);
1708      } else {
1709        ciInstanceKlass *k = tinst->klass()->as_instance_klass();
1710        field = k->get_field_by_offset(tinst->offset(), false);
1711      }
1712      assert(field == NULL ||
1713             original_field == NULL ||
1714             (field->holder() == original_field->holder() &&
1715              field->offset() == original_field->offset() &&
1716              field->is_static() == original_field->is_static()), "wrong field?");
1717      // Set field() and is_rewritable() attributes.
1718      if (field != NULL)  alias_type(idx)->set_field(field);
1719    }
1720  }
1721
1722  // Fill the cache for next time.
1723  ace->_adr_type = adr_type;
1724  ace->_index    = idx;
1725  assert(alias_type(adr_type) == alias_type(idx),  "type must be installed");
1726
1727  // Might as well try to fill the cache for the flattened version, too.
1728  AliasCacheEntry* face = probe_alias_cache(flat);
1729  if (face->_adr_type == NULL) {
1730    face->_adr_type = flat;
1731    face->_index    = idx;
1732    assert(alias_type(flat) == alias_type(idx), "flat type must work too");
1733  }
1734
1735  return alias_type(idx);
1736}
1737
1738
1739Compile::AliasType* Compile::alias_type(ciField* field) {
1740  const TypeOopPtr* t;
1741  if (field->is_static())
1742    t = TypeInstPtr::make(field->holder()->java_mirror());
1743  else
1744    t = TypeOopPtr::make_from_klass_raw(field->holder());
1745  AliasType* atp = alias_type(t->add_offset(field->offset_in_bytes()), field);
1746  assert((field->is_final() || field->is_stable()) == !atp->is_rewritable(), "must get the rewritable bits correct");
1747  return atp;
1748}
1749
1750
1751//------------------------------have_alias_type--------------------------------
1752bool Compile::have_alias_type(const TypePtr* adr_type) {
1753  AliasCacheEntry* ace = probe_alias_cache(adr_type);
1754  if (ace->_adr_type == adr_type) {
1755    return true;
1756  }
1757
1758  // Handle special cases.
1759  if (adr_type == NULL)             return true;
1760  if (adr_type == TypePtr::BOTTOM)  return true;
1761
1762  return find_alias_type(adr_type, true, NULL) != NULL;
1763}
1764
1765//-----------------------------must_alias--------------------------------------
1766// True if all values of the given address type are in the given alias category.
1767bool Compile::must_alias(const TypePtr* adr_type, int alias_idx) {
1768  if (alias_idx == AliasIdxBot)         return true;  // the universal category
1769  if (adr_type == NULL)                 return true;  // NULL serves as TypePtr::TOP
1770  if (alias_idx == AliasIdxTop)         return false; // the empty category
1771  if (adr_type->base() == Type::AnyPtr) return false; // TypePtr::BOTTOM or its twins
1772
1773  // the only remaining possible overlap is identity
1774  int adr_idx = get_alias_index(adr_type);
1775  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
1776  assert(adr_idx == alias_idx ||
1777         (alias_type(alias_idx)->adr_type() != TypeOopPtr::BOTTOM
1778          && adr_type                       != TypeOopPtr::BOTTOM),
1779         "should not be testing for overlap with an unsafe pointer");
1780  return adr_idx == alias_idx;
1781}
1782
1783//------------------------------can_alias--------------------------------------
1784// True if any values of the given address type are in the given alias category.
1785bool Compile::can_alias(const TypePtr* adr_type, int alias_idx) {
1786  if (alias_idx == AliasIdxTop)         return false; // the empty category
1787  if (adr_type == NULL)                 return false; // NULL serves as TypePtr::TOP
1788  if (alias_idx == AliasIdxBot)         return true;  // the universal category
1789  if (adr_type->base() == Type::AnyPtr) return true;  // TypePtr::BOTTOM or its twins
1790
1791  // the only remaining possible overlap is identity
1792  int adr_idx = get_alias_index(adr_type);
1793  assert(adr_idx != AliasIdxBot && adr_idx != AliasIdxTop, "");
1794  return adr_idx == alias_idx;
1795}
1796
1797
1798
1799//---------------------------pop_warm_call-------------------------------------
1800WarmCallInfo* Compile::pop_warm_call() {
1801  WarmCallInfo* wci = _warm_calls;
1802  if (wci != NULL)  _warm_calls = wci->remove_from(wci);
1803  return wci;
1804}
1805
1806//----------------------------Inline_Warm--------------------------------------
1807int Compile::Inline_Warm() {
1808  // If there is room, try to inline some more warm call sites.
1809  // %%% Do a graph index compaction pass when we think we're out of space?
1810  if (!InlineWarmCalls)  return 0;
1811
1812  int calls_made_hot = 0;
1813  int room_to_grow   = NodeCountInliningCutoff - unique();
1814  int amount_to_grow = MIN2(room_to_grow, (int)NodeCountInliningStep);
1815  int amount_grown   = 0;
1816  WarmCallInfo* call;
1817  while (amount_to_grow > 0 && (call = pop_warm_call()) != NULL) {
1818    int est_size = (int)call->size();
1819    if (est_size > (room_to_grow - amount_grown)) {
1820      // This one won't fit anyway.  Get rid of it.
1821      call->make_cold();
1822      continue;
1823    }
1824    call->make_hot();
1825    calls_made_hot++;
1826    amount_grown   += est_size;
1827    amount_to_grow -= est_size;
1828  }
1829
1830  if (calls_made_hot > 0)  set_major_progress();
1831  return calls_made_hot;
1832}
1833
1834
1835//----------------------------Finish_Warm--------------------------------------
1836void Compile::Finish_Warm() {
1837  if (!InlineWarmCalls)  return;
1838  if (failing())  return;
1839  if (warm_calls() == NULL)  return;
1840
1841  // Clean up loose ends, if we are out of space for inlining.
1842  WarmCallInfo* call;
1843  while ((call = pop_warm_call()) != NULL) {
1844    call->make_cold();
1845  }
1846}
1847
1848//---------------------cleanup_loop_predicates-----------------------
1849// Remove the opaque nodes that protect the predicates so that all unused
1850// checks and uncommon_traps will be eliminated from the ideal graph
1851void Compile::cleanup_loop_predicates(PhaseIterGVN &igvn) {
1852  if (predicate_count()==0) return;
1853  for (int i = predicate_count(); i > 0; i--) {
1854    Node * n = predicate_opaque1_node(i-1);
1855    assert(n->Opcode() == Op_Opaque1, "must be");
1856    igvn.replace_node(n, n->in(1));
1857  }
1858  assert(predicate_count()==0, "should be clean!");
1859}
1860
1861// StringOpts and late inlining of string methods
1862void Compile::inline_string_calls(bool parse_time) {
1863  {
1864    // remove useless nodes to make the usage analysis simpler
1865    ResourceMark rm;
1866    PhaseRemoveUseless pru(initial_gvn(), for_igvn());
1867  }
1868
1869  {
1870    ResourceMark rm;
1871    print_method(PHASE_BEFORE_STRINGOPTS, 3);
1872    PhaseStringOpts pso(initial_gvn(), for_igvn());
1873    print_method(PHASE_AFTER_STRINGOPTS, 3);
1874  }
1875
1876  // now inline anything that we skipped the first time around
1877  if (!parse_time) {
1878    _late_inlines_pos = _late_inlines.length();
1879  }
1880
1881  while (_string_late_inlines.length() > 0) {
1882    CallGenerator* cg = _string_late_inlines.pop();
1883    cg->do_late_inline();
1884    if (failing())  return;
1885  }
1886  _string_late_inlines.trunc_to(0);
1887}
1888
1889// Late inlining of boxing methods
1890void Compile::inline_boxing_calls(PhaseIterGVN& igvn) {
1891  if (_boxing_late_inlines.length() > 0) {
1892    assert(has_boxed_value(), "inconsistent");
1893
1894    PhaseGVN* gvn = initial_gvn();
1895    set_inlining_incrementally(true);
1896
1897    assert( igvn._worklist.size() == 0, "should be done with igvn" );
1898    for_igvn()->clear();
1899    gvn->replace_with(&igvn);
1900
1901    _late_inlines_pos = _late_inlines.length();
1902
1903    while (_boxing_late_inlines.length() > 0) {
1904      CallGenerator* cg = _boxing_late_inlines.pop();
1905      cg->do_late_inline();
1906      if (failing())  return;
1907    }
1908    _boxing_late_inlines.trunc_to(0);
1909
1910    {
1911      ResourceMark rm;
1912      PhaseRemoveUseless pru(gvn, for_igvn());
1913    }
1914
1915    igvn = PhaseIterGVN(gvn);
1916    igvn.optimize();
1917
1918    set_inlining_progress(false);
1919    set_inlining_incrementally(false);
1920  }
1921}
1922
1923void Compile::inline_incrementally_one(PhaseIterGVN& igvn) {
1924  assert(IncrementalInline, "incremental inlining should be on");
1925  PhaseGVN* gvn = initial_gvn();
1926
1927  set_inlining_progress(false);
1928  for_igvn()->clear();
1929  gvn->replace_with(&igvn);
1930
1931  int i = 0;
1932
1933  for (; i <_late_inlines.length() && !inlining_progress(); i++) {
1934    CallGenerator* cg = _late_inlines.at(i);
1935    _late_inlines_pos = i+1;
1936    cg->do_late_inline();
1937    if (failing())  return;
1938  }
1939  int j = 0;
1940  for (; i < _late_inlines.length(); i++, j++) {
1941    _late_inlines.at_put(j, _late_inlines.at(i));
1942  }
1943  _late_inlines.trunc_to(j);
1944
1945  {
1946    ResourceMark rm;
1947    PhaseRemoveUseless pru(gvn, for_igvn());
1948  }
1949
1950  igvn = PhaseIterGVN(gvn);
1951}
1952
1953// Perform incremental inlining until bound on number of live nodes is reached
1954void Compile::inline_incrementally(PhaseIterGVN& igvn) {
1955  PhaseGVN* gvn = initial_gvn();
1956
1957  set_inlining_incrementally(true);
1958  set_inlining_progress(true);
1959  uint low_live_nodes = 0;
1960
1961  while(inlining_progress() && _late_inlines.length() > 0) {
1962
1963    if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
1964      if (low_live_nodes < (uint)LiveNodeCountInliningCutoff * 8 / 10) {
1965        // PhaseIdealLoop is expensive so we only try it once we are
1966        // out of live nodes and we only try it again if the previous
1967        // helped got the number of nodes down significantly
1968        PhaseIdealLoop ideal_loop( igvn, false, true );
1969        if (failing())  return;
1970        low_live_nodes = live_nodes();
1971        _major_progress = true;
1972      }
1973
1974      if (live_nodes() > (uint)LiveNodeCountInliningCutoff) {
1975        break;
1976      }
1977    }
1978
1979    inline_incrementally_one(igvn);
1980
1981    if (failing())  return;
1982
1983    igvn.optimize();
1984
1985    if (failing())  return;
1986  }
1987
1988  assert( igvn._worklist.size() == 0, "should be done with igvn" );
1989
1990  if (_string_late_inlines.length() > 0) {
1991    assert(has_stringbuilder(), "inconsistent");
1992    for_igvn()->clear();
1993    initial_gvn()->replace_with(&igvn);
1994
1995    inline_string_calls(false);
1996
1997    if (failing())  return;
1998
1999    {
2000      ResourceMark rm;
2001      PhaseRemoveUseless pru(initial_gvn(), for_igvn());
2002    }
2003
2004    igvn = PhaseIterGVN(gvn);
2005
2006    igvn.optimize();
2007  }
2008
2009  set_inlining_incrementally(false);
2010}
2011
2012
2013//------------------------------Optimize---------------------------------------
2014// Given a graph, optimize it.
2015void Compile::Optimize() {
2016  TracePhase t1("optimizer", &_t_optimizer, true);
2017
2018#ifndef PRODUCT
2019  if (env()->break_at_compile()) {
2020    BREAKPOINT;
2021  }
2022
2023#endif
2024
2025  ResourceMark rm;
2026  int          loop_opts_cnt;
2027
2028  print_inlining_reinit();
2029
2030  NOT_PRODUCT( verify_graph_edges(); )
2031
2032  print_method(PHASE_AFTER_PARSING);
2033
2034 {
2035  // Iterative Global Value Numbering, including ideal transforms
2036  // Initialize IterGVN with types and values from parse-time GVN
2037  PhaseIterGVN igvn(initial_gvn());
2038  {
2039    NOT_PRODUCT( TracePhase t2("iterGVN", &_t_iterGVN, TimeCompiler); )
2040    igvn.optimize();
2041  }
2042
2043  print_method(PHASE_ITER_GVN1, 2);
2044
2045  if (failing())  return;
2046
2047  {
2048    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
2049    inline_incrementally(igvn);
2050  }
2051
2052  print_method(PHASE_INCREMENTAL_INLINE, 2);
2053
2054  if (failing())  return;
2055
2056  if (eliminate_boxing()) {
2057    NOT_PRODUCT( TracePhase t2("incrementalInline", &_t_incrInline, TimeCompiler); )
2058    // Inline valueOf() methods now.
2059    inline_boxing_calls(igvn);
2060
2061    if (AlwaysIncrementalInline) {
2062      inline_incrementally(igvn);
2063    }
2064
2065    print_method(PHASE_INCREMENTAL_BOXING_INLINE, 2);
2066
2067    if (failing())  return;
2068  }
2069
2070  // Remove the speculative part of types and clean up the graph from
2071  // the extra CastPP nodes whose only purpose is to carry them. Do
2072  // that early so that optimizations are not disrupted by the extra
2073  // CastPP nodes.
2074  remove_speculative_types(igvn);
2075
2076  // No more new expensive nodes will be added to the list from here
2077  // so keep only the actual candidates for optimizations.
2078  cleanup_expensive_nodes(igvn);
2079
2080  // Perform escape analysis
2081  if (_do_escape_analysis && ConnectionGraph::has_candidates(this)) {
2082    if (has_loops()) {
2083      // Cleanup graph (remove dead nodes).
2084      TracePhase t2("idealLoop", &_t_idealLoop, true);
2085      PhaseIdealLoop ideal_loop( igvn, false, true );
2086      if (major_progress()) print_method(PHASE_PHASEIDEAL_BEFORE_EA, 2);
2087      if (failing())  return;
2088    }
2089    ConnectionGraph::do_analysis(this, &igvn);
2090
2091    if (failing())  return;
2092
2093    // Optimize out fields loads from scalar replaceable allocations.
2094    igvn.optimize();
2095    print_method(PHASE_ITER_GVN_AFTER_EA, 2);
2096
2097    if (failing())  return;
2098
2099    if (congraph() != NULL && macro_count() > 0) {
2100      NOT_PRODUCT( TracePhase t2("macroEliminate", &_t_macroEliminate, TimeCompiler); )
2101      PhaseMacroExpand mexp(igvn);
2102      mexp.eliminate_macro_nodes();
2103      igvn.set_delay_transform(false);
2104
2105      igvn.optimize();
2106      print_method(PHASE_ITER_GVN_AFTER_ELIMINATION, 2);
2107
2108      if (failing())  return;
2109    }
2110  }
2111
2112  // Loop transforms on the ideal graph.  Range Check Elimination,
2113  // peeling, unrolling, etc.
2114
2115  // Set loop opts counter
2116  loop_opts_cnt = num_loop_opts();
2117  if((loop_opts_cnt > 0) && (has_loops() || has_split_ifs())) {
2118    {
2119      TracePhase t2("idealLoop", &_t_idealLoop, true);
2120      PhaseIdealLoop ideal_loop( igvn, true );
2121      loop_opts_cnt--;
2122      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP1, 2);
2123      if (failing())  return;
2124    }
2125    // Loop opts pass if partial peeling occurred in previous pass
2126    if(PartialPeelLoop && major_progress() && (loop_opts_cnt > 0)) {
2127      TracePhase t3("idealLoop", &_t_idealLoop, true);
2128      PhaseIdealLoop ideal_loop( igvn, false );
2129      loop_opts_cnt--;
2130      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP2, 2);
2131      if (failing())  return;
2132    }
2133    // Loop opts pass for loop-unrolling before CCP
2134    if(major_progress() && (loop_opts_cnt > 0)) {
2135      TracePhase t4("idealLoop", &_t_idealLoop, true);
2136      PhaseIdealLoop ideal_loop( igvn, false );
2137      loop_opts_cnt--;
2138      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP3, 2);
2139    }
2140    if (!failing()) {
2141      // Verify that last round of loop opts produced a valid graph
2142      NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
2143      PhaseIdealLoop::verify(igvn);
2144    }
2145  }
2146  if (failing())  return;
2147
2148  // Conditional Constant Propagation;
2149  PhaseCCP ccp( &igvn );
2150  assert( true, "Break here to ccp.dump_nodes_and_types(_root,999,1)");
2151  {
2152    TracePhase t2("ccp", &_t_ccp, true);
2153    ccp.do_transform();
2154  }
2155  print_method(PHASE_CPP1, 2);
2156
2157  assert( true, "Break here to ccp.dump_old2new_map()");
2158
2159  // Iterative Global Value Numbering, including ideal transforms
2160  {
2161    NOT_PRODUCT( TracePhase t2("iterGVN2", &_t_iterGVN2, TimeCompiler); )
2162    igvn = ccp;
2163    igvn.optimize();
2164  }
2165
2166  print_method(PHASE_ITER_GVN2, 2);
2167
2168  if (failing())  return;
2169
2170  // Loop transforms on the ideal graph.  Range Check Elimination,
2171  // peeling, unrolling, etc.
2172  if(loop_opts_cnt > 0) {
2173    debug_only( int cnt = 0; );
2174    while(major_progress() && (loop_opts_cnt > 0)) {
2175      TracePhase t2("idealLoop", &_t_idealLoop, true);
2176      assert( cnt++ < 40, "infinite cycle in loop optimization" );
2177      PhaseIdealLoop ideal_loop( igvn, true);
2178      loop_opts_cnt--;
2179      if (major_progress()) print_method(PHASE_PHASEIDEALLOOP_ITERATIONS, 2);
2180      if (failing())  return;
2181    }
2182  }
2183
2184  {
2185    // Verify that all previous optimizations produced a valid graph
2186    // at least to this point, even if no loop optimizations were done.
2187    NOT_PRODUCT( TracePhase t2("idealLoopVerify", &_t_idealLoopVerify, TimeCompiler); )
2188    PhaseIdealLoop::verify(igvn);
2189  }
2190
2191  {
2192    NOT_PRODUCT( TracePhase t2("macroExpand", &_t_macroExpand, TimeCompiler); )
2193    PhaseMacroExpand  mex(igvn);
2194    if (mex.expand_macro_nodes()) {
2195      assert(failing(), "must bail out w/ explicit message");
2196      return;
2197    }
2198  }
2199
2200 } // (End scope of igvn; run destructor if necessary for asserts.)
2201
2202  process_print_inlining();
2203  // A method with only infinite loops has no edges entering loops from root
2204  {
2205    NOT_PRODUCT( TracePhase t2("graphReshape", &_t_graphReshaping, TimeCompiler); )
2206    if (final_graph_reshaping()) {
2207      assert(failing(), "must bail out w/ explicit message");
2208      return;
2209    }
2210  }
2211
2212  print_method(PHASE_OPTIMIZE_FINISHED, 2);
2213}
2214
2215
2216//------------------------------Code_Gen---------------------------------------
2217// Given a graph, generate code for it
2218void Compile::Code_Gen() {
2219  if (failing()) {
2220    return;
2221  }
2222
2223  // Perform instruction selection.  You might think we could reclaim Matcher
2224  // memory PDQ, but actually the Matcher is used in generating spill code.
2225  // Internals of the Matcher (including some VectorSets) must remain live
2226  // for awhile - thus I cannot reclaim Matcher memory lest a VectorSet usage
2227  // set a bit in reclaimed memory.
2228
2229  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
2230  // nodes.  Mapping is only valid at the root of each matched subtree.
2231  NOT_PRODUCT( verify_graph_edges(); )
2232
2233  Matcher matcher;
2234  _matcher = &matcher;
2235  {
2236    TracePhase t2("matcher", &_t_matcher, true);
2237    matcher.match();
2238  }
2239  // In debug mode can dump m._nodes.dump() for mapping of ideal to machine
2240  // nodes.  Mapping is only valid at the root of each matched subtree.
2241  NOT_PRODUCT( verify_graph_edges(); )
2242
2243  // If you have too many nodes, or if matching has failed, bail out
2244  check_node_count(0, "out of nodes matching instructions");
2245  if (failing()) {
2246    return;
2247  }
2248
2249  // Build a proper-looking CFG
2250  PhaseCFG cfg(node_arena(), root(), matcher);
2251  _cfg = &cfg;
2252  {
2253    NOT_PRODUCT( TracePhase t2("scheduler", &_t_scheduler, TimeCompiler); )
2254    bool success = cfg.do_global_code_motion();
2255    if (!success) {
2256      return;
2257    }
2258
2259    print_method(PHASE_GLOBAL_CODE_MOTION, 2);
2260    NOT_PRODUCT( verify_graph_edges(); )
2261    debug_only( cfg.verify(); )
2262  }
2263
2264  PhaseChaitin regalloc(unique(), cfg, matcher);
2265  _regalloc = &regalloc;
2266  {
2267    TracePhase t2("regalloc", &_t_registerAllocation, true);
2268    // Perform register allocation.  After Chaitin, use-def chains are
2269    // no longer accurate (at spill code) and so must be ignored.
2270    // Node->LRG->reg mappings are still accurate.
2271    _regalloc->Register_Allocate();
2272
2273    // Bail out if the allocator builds too many nodes
2274    if (failing()) {
2275      return;
2276    }
2277  }
2278
2279  // Prior to register allocation we kept empty basic blocks in case the
2280  // the allocator needed a place to spill.  After register allocation we
2281  // are not adding any new instructions.  If any basic block is empty, we
2282  // can now safely remove it.
2283  {
2284    NOT_PRODUCT( TracePhase t2("blockOrdering", &_t_blockOrdering, TimeCompiler); )
2285    cfg.remove_empty_blocks();
2286    if (do_freq_based_layout()) {
2287      PhaseBlockLayout layout(cfg);
2288    } else {
2289      cfg.set_loop_alignment();
2290    }
2291    cfg.fixup_flow();
2292  }
2293
2294  // Apply peephole optimizations
2295  if( OptoPeephole ) {
2296    NOT_PRODUCT( TracePhase t2("peephole", &_t_peephole, TimeCompiler); )
2297    PhasePeephole peep( _regalloc, cfg);
2298    peep.do_transform();
2299  }
2300
2301  // Do late expand if CPU requires this.
2302  if (Matcher::require_postalloc_expand) {
2303    NOT_PRODUCT(TracePhase t2c("postalloc_expand", &_t_postalloc_expand, true));
2304    cfg.postalloc_expand(_regalloc);
2305  }
2306
2307  // Convert Nodes to instruction bits in a buffer
2308  {
2309    // %%%% workspace merge brought two timers together for one job
2310    TracePhase t2a("output", &_t_output, true);
2311    NOT_PRODUCT( TraceTime t2b(NULL, &_t_codeGeneration, TimeCompiler, false); )
2312    Output();
2313  }
2314
2315  print_method(PHASE_FINAL_CODE);
2316
2317  // He's dead, Jim.
2318  _cfg     = (PhaseCFG*)0xdeadbeef;
2319  _regalloc = (PhaseChaitin*)0xdeadbeef;
2320}
2321
2322
2323//------------------------------dump_asm---------------------------------------
2324// Dump formatted assembly
2325#ifndef PRODUCT
2326void Compile::dump_asm(int *pcs, uint pc_limit) {
2327  bool cut_short = false;
2328  tty->print_cr("#");
2329  tty->print("#  ");  _tf->dump();  tty->cr();
2330  tty->print_cr("#");
2331
2332  // For all blocks
2333  int pc = 0x0;                 // Program counter
2334  char starts_bundle = ' ';
2335  _regalloc->dump_frame();
2336
2337  Node *n = NULL;
2338  for (uint i = 0; i < _cfg->number_of_blocks(); i++) {
2339    if (VMThread::should_terminate()) {
2340      cut_short = true;
2341      break;
2342    }
2343    Block* block = _cfg->get_block(i);
2344    if (block->is_connector() && !Verbose) {
2345      continue;
2346    }
2347    n = block->head();
2348    if (pcs && n->_idx < pc_limit) {
2349      tty->print("%3.3x   ", pcs[n->_idx]);
2350    } else {
2351      tty->print("      ");
2352    }
2353    block->dump_head(_cfg);
2354    if (block->is_connector()) {
2355      tty->print_cr("        # Empty connector block");
2356    } else if (block->num_preds() == 2 && block->pred(1)->is_CatchProj() && block->pred(1)->as_CatchProj()->_con == CatchProjNode::fall_through_index) {
2357      tty->print_cr("        # Block is sole successor of call");
2358    }
2359
2360    // For all instructions
2361    Node *delay = NULL;
2362    for (uint j = 0; j < block->number_of_nodes(); j++) {
2363      if (VMThread::should_terminate()) {
2364        cut_short = true;
2365        break;
2366      }
2367      n = block->get_node(j);
2368      if (valid_bundle_info(n)) {
2369        Bundle* bundle = node_bundling(n);
2370        if (bundle->used_in_unconditional_delay()) {
2371          delay = n;
2372          continue;
2373        }
2374        if (bundle->starts_bundle()) {
2375          starts_bundle = '+';
2376        }
2377      }
2378
2379      if (WizardMode) {
2380        n->dump();
2381      }
2382
2383      if( !n->is_Region() &&    // Dont print in the Assembly
2384          !n->is_Phi() &&       // a few noisely useless nodes
2385          !n->is_Proj() &&
2386          !n->is_MachTemp() &&
2387          !n->is_SafePointScalarObject() &&
2388          !n->is_Catch() &&     // Would be nice to print exception table targets
2389          !n->is_MergeMem() &&  // Not very interesting
2390          !n->is_top() &&       // Debug info table constants
2391          !(n->is_Con() && !n->is_Mach())// Debug info table constants
2392          ) {
2393        if (pcs && n->_idx < pc_limit)
2394          tty->print("%3.3x", pcs[n->_idx]);
2395        else
2396          tty->print("   ");
2397        tty->print(" %c ", starts_bundle);
2398        starts_bundle = ' ';
2399        tty->print("\t");
2400        n->format(_regalloc, tty);
2401        tty->cr();
2402      }
2403
2404      // If we have an instruction with a delay slot, and have seen a delay,
2405      // then back up and print it
2406      if (valid_bundle_info(n) && node_bundling(n)->use_unconditional_delay()) {
2407        assert(delay != NULL, "no unconditional delay instruction");
2408        if (WizardMode) delay->dump();
2409
2410        if (node_bundling(delay)->starts_bundle())
2411          starts_bundle = '+';
2412        if (pcs && n->_idx < pc_limit)
2413          tty->print("%3.3x", pcs[n->_idx]);
2414        else
2415          tty->print("   ");
2416        tty->print(" %c ", starts_bundle);
2417        starts_bundle = ' ';
2418        tty->print("\t");
2419        delay->format(_regalloc, tty);
2420        tty->cr();
2421        delay = NULL;
2422      }
2423
2424      // Dump the exception table as well
2425      if( n->is_Catch() && (Verbose || WizardMode) ) {
2426        // Print the exception table for this offset
2427        _handler_table.print_subtable_for(pc);
2428      }
2429    }
2430
2431    if (pcs && n->_idx < pc_limit)
2432      tty->print_cr("%3.3x", pcs[n->_idx]);
2433    else
2434      tty->cr();
2435
2436    assert(cut_short || delay == NULL, "no unconditional delay branch");
2437
2438  } // End of per-block dump
2439  tty->cr();
2440
2441  if (cut_short)  tty->print_cr("*** disassembly is cut short ***");
2442}
2443#endif
2444
2445//------------------------------Final_Reshape_Counts---------------------------
2446// This class defines counters to help identify when a method
2447// may/must be executed using hardware with only 24-bit precision.
2448struct Final_Reshape_Counts : public StackObj {
2449  int  _call_count;             // count non-inlined 'common' calls
2450  int  _float_count;            // count float ops requiring 24-bit precision
2451  int  _double_count;           // count double ops requiring more precision
2452  int  _java_call_count;        // count non-inlined 'java' calls
2453  int  _inner_loop_count;       // count loops which need alignment
2454  VectorSet _visited;           // Visitation flags
2455  Node_List _tests;             // Set of IfNodes & PCTableNodes
2456
2457  Final_Reshape_Counts() :
2458    _call_count(0), _float_count(0), _double_count(0),
2459    _java_call_count(0), _inner_loop_count(0),
2460    _visited( Thread::current()->resource_area() ) { }
2461
2462  void inc_call_count  () { _call_count  ++; }
2463  void inc_float_count () { _float_count ++; }
2464  void inc_double_count() { _double_count++; }
2465  void inc_java_call_count() { _java_call_count++; }
2466  void inc_inner_loop_count() { _inner_loop_count++; }
2467
2468  int  get_call_count  () const { return _call_count  ; }
2469  int  get_float_count () const { return _float_count ; }
2470  int  get_double_count() const { return _double_count; }
2471  int  get_java_call_count() const { return _java_call_count; }
2472  int  get_inner_loop_count() const { return _inner_loop_count; }
2473};
2474
2475#ifdef ASSERT
2476static bool oop_offset_is_sane(const TypeInstPtr* tp) {
2477  ciInstanceKlass *k = tp->klass()->as_instance_klass();
2478  // Make sure the offset goes inside the instance layout.
2479  return k->contains_field_offset(tp->offset());
2480  // Note that OffsetBot and OffsetTop are very negative.
2481}
2482#endif
2483
2484// Eliminate trivially redundant StoreCMs and accumulate their
2485// precedence edges.
2486void Compile::eliminate_redundant_card_marks(Node* n) {
2487  assert(n->Opcode() == Op_StoreCM, "expected StoreCM");
2488  if (n->in(MemNode::Address)->outcnt() > 1) {
2489    // There are multiple users of the same address so it might be
2490    // possible to eliminate some of the StoreCMs
2491    Node* mem = n->in(MemNode::Memory);
2492    Node* adr = n->in(MemNode::Address);
2493    Node* val = n->in(MemNode::ValueIn);
2494    Node* prev = n;
2495    bool done = false;
2496    // Walk the chain of StoreCMs eliminating ones that match.  As
2497    // long as it's a chain of single users then the optimization is
2498    // safe.  Eliminating partially redundant StoreCMs would require
2499    // cloning copies down the other paths.
2500    while (mem->Opcode() == Op_StoreCM && mem->outcnt() == 1 && !done) {
2501      if (adr == mem->in(MemNode::Address) &&
2502          val == mem->in(MemNode::ValueIn)) {
2503        // redundant StoreCM
2504        if (mem->req() > MemNode::OopStore) {
2505          // Hasn't been processed by this code yet.
2506          n->add_prec(mem->in(MemNode::OopStore));
2507        } else {
2508          // Already converted to precedence edge
2509          for (uint i = mem->req(); i < mem->len(); i++) {
2510            // Accumulate any precedence edges
2511            if (mem->in(i) != NULL) {
2512              n->add_prec(mem->in(i));
2513            }
2514          }
2515          // Everything above this point has been processed.
2516          done = true;
2517        }
2518        // Eliminate the previous StoreCM
2519        prev->set_req(MemNode::Memory, mem->in(MemNode::Memory));
2520        assert(mem->outcnt() == 0, "should be dead");
2521        mem->disconnect_inputs(NULL, this);
2522      } else {
2523        prev = mem;
2524      }
2525      mem = prev->in(MemNode::Memory);
2526    }
2527  }
2528}
2529
2530//------------------------------final_graph_reshaping_impl----------------------
2531// Implement items 1-5 from final_graph_reshaping below.
2532void Compile::final_graph_reshaping_impl( Node *n, Final_Reshape_Counts &frc) {
2533
2534  if ( n->outcnt() == 0 ) return; // dead node
2535  uint nop = n->Opcode();
2536
2537  // Check for 2-input instruction with "last use" on right input.
2538  // Swap to left input.  Implements item (2).
2539  if( n->req() == 3 &&          // two-input instruction
2540      n->in(1)->outcnt() > 1 && // left use is NOT a last use
2541      (!n->in(1)->is_Phi() || n->in(1)->in(2) != n) && // it is not data loop
2542      n->in(2)->outcnt() == 1 &&// right use IS a last use
2543      !n->in(2)->is_Con() ) {   // right use is not a constant
2544    // Check for commutative opcode
2545    switch( nop ) {
2546    case Op_AddI:  case Op_AddF:  case Op_AddD:  case Op_AddL:
2547    case Op_MaxI:  case Op_MinI:
2548    case Op_MulI:  case Op_MulF:  case Op_MulD:  case Op_MulL:
2549    case Op_AndL:  case Op_XorL:  case Op_OrL:
2550    case Op_AndI:  case Op_XorI:  case Op_OrI: {
2551      // Move "last use" input to left by swapping inputs
2552      n->swap_edges(1, 2);
2553      break;
2554    }
2555    default:
2556      break;
2557    }
2558  }
2559
2560#ifdef ASSERT
2561  if( n->is_Mem() ) {
2562    int alias_idx = get_alias_index(n->as_Mem()->adr_type());
2563    assert( n->in(0) != NULL || alias_idx != Compile::AliasIdxRaw ||
2564            // oop will be recorded in oop map if load crosses safepoint
2565            n->is_Load() && (n->as_Load()->bottom_type()->isa_oopptr() ||
2566                             LoadNode::is_immutable_value(n->in(MemNode::Address))),
2567            "raw memory operations should have control edge");
2568  }
2569#endif
2570  // Count FPU ops and common calls, implements item (3)
2571  switch( nop ) {
2572  // Count all float operations that may use FPU
2573  case Op_AddF:
2574  case Op_SubF:
2575  case Op_MulF:
2576  case Op_DivF:
2577  case Op_NegF:
2578  case Op_ModF:
2579  case Op_ConvI2F:
2580  case Op_ConF:
2581  case Op_CmpF:
2582  case Op_CmpF3:
2583  // case Op_ConvL2F: // longs are split into 32-bit halves
2584    frc.inc_float_count();
2585    break;
2586
2587  case Op_ConvF2D:
2588  case Op_ConvD2F:
2589    frc.inc_float_count();
2590    frc.inc_double_count();
2591    break;
2592
2593  // Count all double operations that may use FPU
2594  case Op_AddD:
2595  case Op_SubD:
2596  case Op_MulD:
2597  case Op_DivD:
2598  case Op_NegD:
2599  case Op_ModD:
2600  case Op_ConvI2D:
2601  case Op_ConvD2I:
2602  // case Op_ConvL2D: // handled by leaf call
2603  // case Op_ConvD2L: // handled by leaf call
2604  case Op_ConD:
2605  case Op_CmpD:
2606  case Op_CmpD3:
2607    frc.inc_double_count();
2608    break;
2609  case Op_Opaque1:              // Remove Opaque Nodes before matching
2610  case Op_Opaque2:              // Remove Opaque Nodes before matching
2611  case Op_Opaque3:
2612    n->subsume_by(n->in(1), this);
2613    break;
2614  case Op_CallStaticJava:
2615  case Op_CallJava:
2616  case Op_CallDynamicJava:
2617    frc.inc_java_call_count(); // Count java call site;
2618  case Op_CallRuntime:
2619  case Op_CallLeaf:
2620  case Op_CallLeafNoFP: {
2621    assert( n->is_Call(), "" );
2622    CallNode *call = n->as_Call();
2623    // Count call sites where the FP mode bit would have to be flipped.
2624    // Do not count uncommon runtime calls:
2625    // uncommon_trap, _complete_monitor_locking, _complete_monitor_unlocking,
2626    // _new_Java, _new_typeArray, _new_objArray, _rethrow_Java, ...
2627    if( !call->is_CallStaticJava() || !call->as_CallStaticJava()->_name ) {
2628      frc.inc_call_count();   // Count the call site
2629    } else {                  // See if uncommon argument is shared
2630      Node *n = call->in(TypeFunc::Parms);
2631      int nop = n->Opcode();
2632      // Clone shared simple arguments to uncommon calls, item (1).
2633      if( n->outcnt() > 1 &&
2634          !n->is_Proj() &&
2635          nop != Op_CreateEx &&
2636          nop != Op_CheckCastPP &&
2637          nop != Op_DecodeN &&
2638          nop != Op_DecodeNKlass &&
2639          !n->is_Mem() ) {
2640        Node *x = n->clone();
2641        call->set_req( TypeFunc::Parms, x );
2642      }
2643    }
2644    break;
2645  }
2646
2647  case Op_StoreD:
2648  case Op_LoadD:
2649  case Op_LoadD_unaligned:
2650    frc.inc_double_count();
2651    goto handle_mem;
2652  case Op_StoreF:
2653  case Op_LoadF:
2654    frc.inc_float_count();
2655    goto handle_mem;
2656
2657  case Op_StoreCM:
2658    {
2659      // Convert OopStore dependence into precedence edge
2660      Node* prec = n->in(MemNode::OopStore);
2661      n->del_req(MemNode::OopStore);
2662      n->add_prec(prec);
2663      eliminate_redundant_card_marks(n);
2664    }
2665
2666    // fall through
2667
2668  case Op_StoreB:
2669  case Op_StoreC:
2670  case Op_StorePConditional:
2671  case Op_StoreI:
2672  case Op_StoreL:
2673  case Op_StoreIConditional:
2674  case Op_StoreLConditional:
2675  case Op_CompareAndSwapI:
2676  case Op_CompareAndSwapL:
2677  case Op_CompareAndSwapP:
2678  case Op_CompareAndSwapN:
2679  case Op_GetAndAddI:
2680  case Op_GetAndAddL:
2681  case Op_GetAndSetI:
2682  case Op_GetAndSetL:
2683  case Op_GetAndSetP:
2684  case Op_GetAndSetN:
2685  case Op_StoreP:
2686  case Op_StoreN:
2687  case Op_StoreNKlass:
2688  case Op_LoadB:
2689  case Op_LoadUB:
2690  case Op_LoadUS:
2691  case Op_LoadI:
2692  case Op_LoadKlass:
2693  case Op_LoadNKlass:
2694  case Op_LoadL:
2695  case Op_LoadL_unaligned:
2696  case Op_LoadPLocked:
2697  case Op_LoadP:
2698  case Op_LoadN:
2699  case Op_LoadRange:
2700  case Op_LoadS: {
2701  handle_mem:
2702#ifdef ASSERT
2703    if( VerifyOptoOopOffsets ) {
2704      assert( n->is_Mem(), "" );
2705      MemNode *mem  = (MemNode*)n;
2706      // Check to see if address types have grounded out somehow.
2707      const TypeInstPtr *tp = mem->in(MemNode::Address)->bottom_type()->isa_instptr();
2708      assert( !tp || oop_offset_is_sane(tp), "" );
2709    }
2710#endif
2711    break;
2712  }
2713
2714  case Op_AddP: {               // Assert sane base pointers
2715    Node *addp = n->in(AddPNode::Address);
2716    assert( !addp->is_AddP() ||
2717            addp->in(AddPNode::Base)->is_top() || // Top OK for allocation
2718            addp->in(AddPNode::Base) == n->in(AddPNode::Base),
2719            "Base pointers must match" );
2720#ifdef _LP64
2721    if ((UseCompressedOops || UseCompressedClassPointers) &&
2722        addp->Opcode() == Op_ConP &&
2723        addp == n->in(AddPNode::Base) &&
2724        n->in(AddPNode::Offset)->is_Con()) {
2725      // Use addressing with narrow klass to load with offset on x86.
2726      // On sparc loading 32-bits constant and decoding it have less
2727      // instructions (4) then load 64-bits constant (7).
2728      // Do this transformation here since IGVN will convert ConN back to ConP.
2729      const Type* t = addp->bottom_type();
2730      if (t->isa_oopptr() || t->isa_klassptr()) {
2731        Node* nn = NULL;
2732
2733        int op = t->isa_oopptr() ? Op_ConN : Op_ConNKlass;
2734
2735        // Look for existing ConN node of the same exact type.
2736        Node* r  = root();
2737        uint cnt = r->outcnt();
2738        for (uint i = 0; i < cnt; i++) {
2739          Node* m = r->raw_out(i);
2740          if (m!= NULL && m->Opcode() == op &&
2741              m->bottom_type()->make_ptr() == t) {
2742            nn = m;
2743            break;
2744          }
2745        }
2746        if (nn != NULL) {
2747          // Decode a narrow oop to match address
2748          // [R12 + narrow_oop_reg<<3 + offset]
2749          if (t->isa_oopptr()) {
2750            nn = new DecodeNNode(nn, t);
2751          } else {
2752            nn = new DecodeNKlassNode(nn, t);
2753          }
2754          n->set_req(AddPNode::Base, nn);
2755          n->set_req(AddPNode::Address, nn);
2756          if (addp->outcnt() == 0) {
2757            addp->disconnect_inputs(NULL, this);
2758          }
2759        }
2760      }
2761    }
2762#endif
2763    break;
2764  }
2765
2766#ifdef _LP64
2767  case Op_CastPP:
2768    if (n->in(1)->is_DecodeN() && Matcher::gen_narrow_oop_implicit_null_checks()) {
2769      Node* in1 = n->in(1);
2770      const Type* t = n->bottom_type();
2771      Node* new_in1 = in1->clone();
2772      new_in1->as_DecodeN()->set_type(t);
2773
2774      if (!Matcher::narrow_oop_use_complex_address()) {
2775        //
2776        // x86, ARM and friends can handle 2 adds in addressing mode
2777        // and Matcher can fold a DecodeN node into address by using
2778        // a narrow oop directly and do implicit NULL check in address:
2779        //
2780        // [R12 + narrow_oop_reg<<3 + offset]
2781        // NullCheck narrow_oop_reg
2782        //
2783        // On other platforms (Sparc) we have to keep new DecodeN node and
2784        // use it to do implicit NULL check in address:
2785        //
2786        // decode_not_null narrow_oop_reg, base_reg
2787        // [base_reg + offset]
2788        // NullCheck base_reg
2789        //
2790        // Pin the new DecodeN node to non-null path on these platform (Sparc)
2791        // to keep the information to which NULL check the new DecodeN node
2792        // corresponds to use it as value in implicit_null_check().
2793        //
2794        new_in1->set_req(0, n->in(0));
2795      }
2796
2797      n->subsume_by(new_in1, this);
2798      if (in1->outcnt() == 0) {
2799        in1->disconnect_inputs(NULL, this);
2800      }
2801    }
2802    break;
2803
2804  case Op_CmpP:
2805    // Do this transformation here to preserve CmpPNode::sub() and
2806    // other TypePtr related Ideal optimizations (for example, ptr nullness).
2807    if (n->in(1)->is_DecodeNarrowPtr() || n->in(2)->is_DecodeNarrowPtr()) {
2808      Node* in1 = n->in(1);
2809      Node* in2 = n->in(2);
2810      if (!in1->is_DecodeNarrowPtr()) {
2811        in2 = in1;
2812        in1 = n->in(2);
2813      }
2814      assert(in1->is_DecodeNarrowPtr(), "sanity");
2815
2816      Node* new_in2 = NULL;
2817      if (in2->is_DecodeNarrowPtr()) {
2818        assert(in2->Opcode() == in1->Opcode(), "must be same node type");
2819        new_in2 = in2->in(1);
2820      } else if (in2->Opcode() == Op_ConP) {
2821        const Type* t = in2->bottom_type();
2822        if (t == TypePtr::NULL_PTR) {
2823          assert(in1->is_DecodeN(), "compare klass to null?");
2824          // Don't convert CmpP null check into CmpN if compressed
2825          // oops implicit null check is not generated.
2826          // This will allow to generate normal oop implicit null check.
2827          if (Matcher::gen_narrow_oop_implicit_null_checks())
2828            new_in2 = ConNode::make(this, TypeNarrowOop::NULL_PTR);
2829          //
2830          // This transformation together with CastPP transformation above
2831          // will generated code for implicit NULL checks for compressed oops.
2832          //
2833          // The original code after Optimize()
2834          //
2835          //    LoadN memory, narrow_oop_reg
2836          //    decode narrow_oop_reg, base_reg
2837          //    CmpP base_reg, NULL
2838          //    CastPP base_reg // NotNull
2839          //    Load [base_reg + offset], val_reg
2840          //
2841          // after these transformations will be
2842          //
2843          //    LoadN memory, narrow_oop_reg
2844          //    CmpN narrow_oop_reg, NULL
2845          //    decode_not_null narrow_oop_reg, base_reg
2846          //    Load [base_reg + offset], val_reg
2847          //
2848          // and the uncommon path (== NULL) will use narrow_oop_reg directly
2849          // since narrow oops can be used in debug info now (see the code in
2850          // final_graph_reshaping_walk()).
2851          //
2852          // At the end the code will be matched to
2853          // on x86:
2854          //
2855          //    Load_narrow_oop memory, narrow_oop_reg
2856          //    Load [R12 + narrow_oop_reg<<3 + offset], val_reg
2857          //    NullCheck narrow_oop_reg
2858          //
2859          // and on sparc:
2860          //
2861          //    Load_narrow_oop memory, narrow_oop_reg
2862          //    decode_not_null narrow_oop_reg, base_reg
2863          //    Load [base_reg + offset], val_reg
2864          //    NullCheck base_reg
2865          //
2866        } else if (t->isa_oopptr()) {
2867          new_in2 = ConNode::make(this, t->make_narrowoop());
2868        } else if (t->isa_klassptr()) {
2869          new_in2 = ConNode::make(this, t->make_narrowklass());
2870        }
2871      }
2872      if (new_in2 != NULL) {
2873        Node* cmpN = new CmpNNode(in1->in(1), new_in2);
2874        n->subsume_by(cmpN, this);
2875        if (in1->outcnt() == 0) {
2876          in1->disconnect_inputs(NULL, this);
2877        }
2878        if (in2->outcnt() == 0) {
2879          in2->disconnect_inputs(NULL, this);
2880        }
2881      }
2882    }
2883    break;
2884
2885  case Op_DecodeN:
2886  case Op_DecodeNKlass:
2887    assert(!n->in(1)->is_EncodeNarrowPtr(), "should be optimized out");
2888    // DecodeN could be pinned when it can't be fold into
2889    // an address expression, see the code for Op_CastPP above.
2890    assert(n->in(0) == NULL || (UseCompressedOops && !Matcher::narrow_oop_use_complex_address()), "no control");
2891    break;
2892
2893  case Op_EncodeP:
2894  case Op_EncodePKlass: {
2895    Node* in1 = n->in(1);
2896    if (in1->is_DecodeNarrowPtr()) {
2897      n->subsume_by(in1->in(1), this);
2898    } else if (in1->Opcode() == Op_ConP) {
2899      const Type* t = in1->bottom_type();
2900      if (t == TypePtr::NULL_PTR) {
2901        assert(t->isa_oopptr(), "null klass?");
2902        n->subsume_by(ConNode::make(this, TypeNarrowOop::NULL_PTR), this);
2903      } else if (t->isa_oopptr()) {
2904        n->subsume_by(ConNode::make(this, t->make_narrowoop()), this);
2905      } else if (t->isa_klassptr()) {
2906        n->subsume_by(ConNode::make(this, t->make_narrowklass()), this);
2907      }
2908    }
2909    if (in1->outcnt() == 0) {
2910      in1->disconnect_inputs(NULL, this);
2911    }
2912    break;
2913  }
2914
2915  case Op_Proj: {
2916    if (OptimizeStringConcat) {
2917      ProjNode* p = n->as_Proj();
2918      if (p->_is_io_use) {
2919        // Separate projections were used for the exception path which
2920        // are normally removed by a late inline.  If it wasn't inlined
2921        // then they will hang around and should just be replaced with
2922        // the original one.
2923        Node* proj = NULL;
2924        // Replace with just one
2925        for (SimpleDUIterator i(p->in(0)); i.has_next(); i.next()) {
2926          Node *use = i.get();
2927          if (use->is_Proj() && p != use && use->as_Proj()->_con == p->_con) {
2928            proj = use;
2929            break;
2930          }
2931        }
2932        assert(proj != NULL, "must be found");
2933        p->subsume_by(proj, this);
2934      }
2935    }
2936    break;
2937  }
2938
2939  case Op_Phi:
2940    if (n->as_Phi()->bottom_type()->isa_narrowoop() || n->as_Phi()->bottom_type()->isa_narrowklass()) {
2941      // The EncodeP optimization may create Phi with the same edges
2942      // for all paths. It is not handled well by Register Allocator.
2943      Node* unique_in = n->in(1);
2944      assert(unique_in != NULL, "");
2945      uint cnt = n->req();
2946      for (uint i = 2; i < cnt; i++) {
2947        Node* m = n->in(i);
2948        assert(m != NULL, "");
2949        if (unique_in != m)
2950          unique_in = NULL;
2951      }
2952      if (unique_in != NULL) {
2953        n->subsume_by(unique_in, this);
2954      }
2955    }
2956    break;
2957
2958#endif
2959
2960  case Op_ModI:
2961    if (UseDivMod) {
2962      // Check if a%b and a/b both exist
2963      Node* d = n->find_similar(Op_DivI);
2964      if (d) {
2965        // Replace them with a fused divmod if supported
2966        if (Matcher::has_match_rule(Op_DivModI)) {
2967          DivModINode* divmod = DivModINode::make(this, n);
2968          d->subsume_by(divmod->div_proj(), this);
2969          n->subsume_by(divmod->mod_proj(), this);
2970        } else {
2971          // replace a%b with a-((a/b)*b)
2972          Node* mult = new MulINode(d, d->in(2));
2973          Node* sub  = new SubINode(d->in(1), mult);
2974          n->subsume_by(sub, this);
2975        }
2976      }
2977    }
2978    break;
2979
2980  case Op_ModL:
2981    if (UseDivMod) {
2982      // Check if a%b and a/b both exist
2983      Node* d = n->find_similar(Op_DivL);
2984      if (d) {
2985        // Replace them with a fused divmod if supported
2986        if (Matcher::has_match_rule(Op_DivModL)) {
2987          DivModLNode* divmod = DivModLNode::make(this, n);
2988          d->subsume_by(divmod->div_proj(), this);
2989          n->subsume_by(divmod->mod_proj(), this);
2990        } else {
2991          // replace a%b with a-((a/b)*b)
2992          Node* mult = new MulLNode(d, d->in(2));
2993          Node* sub  = new SubLNode(d->in(1), mult);
2994          n->subsume_by(sub, this);
2995        }
2996      }
2997    }
2998    break;
2999
3000  case Op_LoadVector:
3001  case Op_StoreVector:
3002    break;
3003
3004  case Op_PackB:
3005  case Op_PackS:
3006  case Op_PackI:
3007  case Op_PackF:
3008  case Op_PackL:
3009  case Op_PackD:
3010    if (n->req()-1 > 2) {
3011      // Replace many operand PackNodes with a binary tree for matching
3012      PackNode* p = (PackNode*) n;
3013      Node* btp = p->binary_tree_pack(this, 1, n->req());
3014      n->subsume_by(btp, this);
3015    }
3016    break;
3017  case Op_Loop:
3018  case Op_CountedLoop:
3019    if (n->as_Loop()->is_inner_loop()) {
3020      frc.inc_inner_loop_count();
3021    }
3022    break;
3023  case Op_LShiftI:
3024  case Op_RShiftI:
3025  case Op_URShiftI:
3026  case Op_LShiftL:
3027  case Op_RShiftL:
3028  case Op_URShiftL:
3029    if (Matcher::need_masked_shift_count) {
3030      // The cpu's shift instructions don't restrict the count to the
3031      // lower 5/6 bits. We need to do the masking ourselves.
3032      Node* in2 = n->in(2);
3033      juint mask = (n->bottom_type() == TypeInt::INT) ? (BitsPerInt - 1) : (BitsPerLong - 1);
3034      const TypeInt* t = in2->find_int_type();
3035      if (t != NULL && t->is_con()) {
3036        juint shift = t->get_con();
3037        if (shift > mask) { // Unsigned cmp
3038          n->set_req(2, ConNode::make(this, TypeInt::make(shift & mask)));
3039        }
3040      } else {
3041        if (t == NULL || t->_lo < 0 || t->_hi > (int)mask) {
3042          Node* shift = new AndINode(in2, ConNode::make(this, TypeInt::make(mask)));
3043          n->set_req(2, shift);
3044        }
3045      }
3046      if (in2->outcnt() == 0) { // Remove dead node
3047        in2->disconnect_inputs(NULL, this);
3048      }
3049    }
3050    break;
3051  case Op_MemBarStoreStore:
3052  case Op_MemBarRelease:
3053    // Break the link with AllocateNode: it is no longer useful and
3054    // confuses register allocation.
3055    if (n->req() > MemBarNode::Precedent) {
3056      n->set_req(MemBarNode::Precedent, top());
3057    }
3058    break;
3059  default:
3060    assert( !n->is_Call(), "" );
3061    assert( !n->is_Mem(), "" );
3062    break;
3063  }
3064
3065  // Collect CFG split points
3066  if (n->is_MultiBranch())
3067    frc._tests.push(n);
3068}
3069
3070//------------------------------final_graph_reshaping_walk---------------------
3071// Replacing Opaque nodes with their input in final_graph_reshaping_impl(),
3072// requires that the walk visits a node's inputs before visiting the node.
3073void Compile::final_graph_reshaping_walk( Node_Stack &nstack, Node *root, Final_Reshape_Counts &frc ) {
3074  ResourceArea *area = Thread::current()->resource_area();
3075  Unique_Node_List sfpt(area);
3076
3077  frc._visited.set(root->_idx); // first, mark node as visited
3078  uint cnt = root->req();
3079  Node *n = root;
3080  uint  i = 0;
3081  while (true) {
3082    if (i < cnt) {
3083      // Place all non-visited non-null inputs onto stack
3084      Node* m = n->in(i);
3085      ++i;
3086      if (m != NULL && !frc._visited.test_set(m->_idx)) {
3087        if (m->is_SafePoint() && m->as_SafePoint()->jvms() != NULL) {
3088          // compute worst case interpreter size in case of a deoptimization
3089          update_interpreter_frame_size(m->as_SafePoint()->jvms()->interpreter_frame_size());
3090
3091          sfpt.push(m);
3092        }
3093        cnt = m->req();
3094        nstack.push(n, i); // put on stack parent and next input's index
3095        n = m;
3096        i = 0;
3097      }
3098    } else {
3099      // Now do post-visit work
3100      final_graph_reshaping_impl( n, frc );
3101      if (nstack.is_empty())
3102        break;             // finished
3103      n = nstack.node();   // Get node from stack
3104      cnt = n->req();
3105      i = nstack.index();
3106      nstack.pop();        // Shift to the next node on stack
3107    }
3108  }
3109
3110  // Skip next transformation if compressed oops are not used.
3111  if ((UseCompressedOops && !Matcher::gen_narrow_oop_implicit_null_checks()) ||
3112      (!UseCompressedOops && !UseCompressedClassPointers))
3113    return;
3114
3115  // Go over safepoints nodes to skip DecodeN/DecodeNKlass nodes for debug edges.
3116  // It could be done for an uncommon traps or any safepoints/calls
3117  // if the DecodeN/DecodeNKlass node is referenced only in a debug info.
3118  while (sfpt.size() > 0) {
3119    n = sfpt.pop();
3120    JVMState *jvms = n->as_SafePoint()->jvms();
3121    assert(jvms != NULL, "sanity");
3122    int start = jvms->debug_start();
3123    int end   = n->req();
3124    bool is_uncommon = (n->is_CallStaticJava() &&
3125                        n->as_CallStaticJava()->uncommon_trap_request() != 0);
3126    for (int j = start; j < end; j++) {
3127      Node* in = n->in(j);
3128      if (in->is_DecodeNarrowPtr()) {
3129        bool safe_to_skip = true;
3130        if (!is_uncommon ) {
3131          // Is it safe to skip?
3132          for (uint i = 0; i < in->outcnt(); i++) {
3133            Node* u = in->raw_out(i);
3134            if (!u->is_SafePoint() ||
3135                 u->is_Call() && u->as_Call()->has_non_debug_use(n)) {
3136              safe_to_skip = false;
3137            }
3138          }
3139        }
3140        if (safe_to_skip) {
3141          n->set_req(j, in->in(1));
3142        }
3143        if (in->outcnt() == 0) {
3144          in->disconnect_inputs(NULL, this);
3145        }
3146      }
3147    }
3148  }
3149}
3150
3151//------------------------------final_graph_reshaping--------------------------
3152// Final Graph Reshaping.
3153//
3154// (1) Clone simple inputs to uncommon calls, so they can be scheduled late
3155//     and not commoned up and forced early.  Must come after regular
3156//     optimizations to avoid GVN undoing the cloning.  Clone constant
3157//     inputs to Loop Phis; these will be split by the allocator anyways.
3158//     Remove Opaque nodes.
3159// (2) Move last-uses by commutative operations to the left input to encourage
3160//     Intel update-in-place two-address operations and better register usage
3161//     on RISCs.  Must come after regular optimizations to avoid GVN Ideal
3162//     calls canonicalizing them back.
3163// (3) Count the number of double-precision FP ops, single-precision FP ops
3164//     and call sites.  On Intel, we can get correct rounding either by
3165//     forcing singles to memory (requires extra stores and loads after each
3166//     FP bytecode) or we can set a rounding mode bit (requires setting and
3167//     clearing the mode bit around call sites).  The mode bit is only used
3168//     if the relative frequency of single FP ops to calls is low enough.
3169//     This is a key transform for SPEC mpeg_audio.
3170// (4) Detect infinite loops; blobs of code reachable from above but not
3171//     below.  Several of the Code_Gen algorithms fail on such code shapes,
3172//     so we simply bail out.  Happens a lot in ZKM.jar, but also happens
3173//     from time to time in other codes (such as -Xcomp finalizer loops, etc).
3174//     Detection is by looking for IfNodes where only 1 projection is
3175//     reachable from below or CatchNodes missing some targets.
3176// (5) Assert for insane oop offsets in debug mode.
3177
3178bool Compile::final_graph_reshaping() {
3179  // an infinite loop may have been eliminated by the optimizer,
3180  // in which case the graph will be empty.
3181  if (root()->req() == 1) {
3182    record_method_not_compilable("trivial infinite loop");
3183    return true;
3184  }
3185
3186  // Expensive nodes have their control input set to prevent the GVN
3187  // from freely commoning them. There's no GVN beyond this point so
3188  // no need to keep the control input. We want the expensive nodes to
3189  // be freely moved to the least frequent code path by gcm.
3190  assert(OptimizeExpensiveOps || expensive_count() == 0, "optimization off but list non empty?");
3191  for (int i = 0; i < expensive_count(); i++) {
3192    _expensive_nodes->at(i)->set_req(0, NULL);
3193  }
3194
3195  Final_Reshape_Counts frc;
3196
3197  // Visit everybody reachable!
3198  // Allocate stack of size C->unique()/2 to avoid frequent realloc
3199  Node_Stack nstack(unique() >> 1);
3200  final_graph_reshaping_walk(nstack, root(), frc);
3201
3202  // Check for unreachable (from below) code (i.e., infinite loops).
3203  for( uint i = 0; i < frc._tests.size(); i++ ) {
3204    MultiBranchNode *n = frc._tests[i]->as_MultiBranch();
3205    // Get number of CFG targets.
3206    // Note that PCTables include exception targets after calls.
3207    uint required_outcnt = n->required_outcnt();
3208    if (n->outcnt() != required_outcnt) {
3209      // Check for a few special cases.  Rethrow Nodes never take the
3210      // 'fall-thru' path, so expected kids is 1 less.
3211      if (n->is_PCTable() && n->in(0) && n->in(0)->in(0)) {
3212        if (n->in(0)->in(0)->is_Call()) {
3213          CallNode *call = n->in(0)->in(0)->as_Call();
3214          if (call->entry_point() == OptoRuntime::rethrow_stub()) {
3215            required_outcnt--;      // Rethrow always has 1 less kid
3216          } else if (call->req() > TypeFunc::Parms &&
3217                     call->is_CallDynamicJava()) {
3218            // Check for null receiver. In such case, the optimizer has
3219            // detected that the virtual call will always result in a null
3220            // pointer exception. The fall-through projection of this CatchNode
3221            // will not be populated.
3222            Node *arg0 = call->in(TypeFunc::Parms);
3223            if (arg0->is_Type() &&
3224                arg0->as_Type()->type()->higher_equal(TypePtr::NULL_PTR)) {
3225              required_outcnt--;
3226            }
3227          } else if (call->entry_point() == OptoRuntime::new_array_Java() &&
3228                     call->req() > TypeFunc::Parms+1 &&
3229                     call->is_CallStaticJava()) {
3230            // Check for negative array length. In such case, the optimizer has
3231            // detected that the allocation attempt will always result in an
3232            // exception. There is no fall-through projection of this CatchNode .
3233            Node *arg1 = call->in(TypeFunc::Parms+1);
3234            if (arg1->is_Type() &&
3235                arg1->as_Type()->type()->join(TypeInt::POS)->empty()) {
3236              required_outcnt--;
3237            }
3238          }
3239        }
3240      }
3241      // Recheck with a better notion of 'required_outcnt'
3242      if (n->outcnt() != required_outcnt) {
3243        record_method_not_compilable("malformed control flow");
3244        return true;            // Not all targets reachable!
3245      }
3246    }
3247    // Check that I actually visited all kids.  Unreached kids
3248    // must be infinite loops.
3249    for (DUIterator_Fast jmax, j = n->fast_outs(jmax); j < jmax; j++)
3250      if (!frc._visited.test(n->fast_out(j)->_idx)) {
3251        record_method_not_compilable("infinite loop");
3252        return true;            // Found unvisited kid; must be unreach
3253      }
3254  }
3255
3256  // If original bytecodes contained a mixture of floats and doubles
3257  // check if the optimizer has made it homogenous, item (3).
3258  if( Use24BitFPMode && Use24BitFP && UseSSE == 0 &&
3259      frc.get_float_count() > 32 &&
3260      frc.get_double_count() == 0 &&
3261      (10 * frc.get_call_count() < frc.get_float_count()) ) {
3262    set_24_bit_selection_and_mode( false,  true );
3263  }
3264
3265  set_java_calls(frc.get_java_call_count());
3266  set_inner_loops(frc.get_inner_loop_count());
3267
3268  // No infinite loops, no reason to bail out.
3269  return false;
3270}
3271
3272//-----------------------------too_many_traps----------------------------------
3273// Report if there are too many traps at the current method and bci.
3274// Return true if there was a trap, and/or PerMethodTrapLimit is exceeded.
3275bool Compile::too_many_traps(ciMethod* method,
3276                             int bci,
3277                             Deoptimization::DeoptReason reason) {
3278  ciMethodData* md = method->method_data();
3279  if (md->is_empty()) {
3280    // Assume the trap has not occurred, or that it occurred only
3281    // because of a transient condition during start-up in the interpreter.
3282    return false;
3283  }
3284  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
3285  if (md->has_trap_at(bci, m, reason) != 0) {
3286    // Assume PerBytecodeTrapLimit==0, for a more conservative heuristic.
3287    // Also, if there are multiple reasons, or if there is no per-BCI record,
3288    // assume the worst.
3289    if (log())
3290      log()->elem("observe trap='%s' count='%d'",
3291                  Deoptimization::trap_reason_name(reason),
3292                  md->trap_count(reason));
3293    return true;
3294  } else {
3295    // Ignore method/bci and see if there have been too many globally.
3296    return too_many_traps(reason, md);
3297  }
3298}
3299
3300// Less-accurate variant which does not require a method and bci.
3301bool Compile::too_many_traps(Deoptimization::DeoptReason reason,
3302                             ciMethodData* logmd) {
3303  if (trap_count(reason) >= Deoptimization::per_method_trap_limit(reason)) {
3304    // Too many traps globally.
3305    // Note that we use cumulative trap_count, not just md->trap_count.
3306    if (log()) {
3307      int mcount = (logmd == NULL)? -1: (int)logmd->trap_count(reason);
3308      log()->elem("observe trap='%s' count='0' mcount='%d' ccount='%d'",
3309                  Deoptimization::trap_reason_name(reason),
3310                  mcount, trap_count(reason));
3311    }
3312    return true;
3313  } else {
3314    // The coast is clear.
3315    return false;
3316  }
3317}
3318
3319//--------------------------too_many_recompiles--------------------------------
3320// Report if there are too many recompiles at the current method and bci.
3321// Consults PerBytecodeRecompilationCutoff and PerMethodRecompilationCutoff.
3322// Is not eager to return true, since this will cause the compiler to use
3323// Action_none for a trap point, to avoid too many recompilations.
3324bool Compile::too_many_recompiles(ciMethod* method,
3325                                  int bci,
3326                                  Deoptimization::DeoptReason reason) {
3327  ciMethodData* md = method->method_data();
3328  if (md->is_empty()) {
3329    // Assume the trap has not occurred, or that it occurred only
3330    // because of a transient condition during start-up in the interpreter.
3331    return false;
3332  }
3333  // Pick a cutoff point well within PerBytecodeRecompilationCutoff.
3334  uint bc_cutoff = (uint) PerBytecodeRecompilationCutoff / 8;
3335  uint m_cutoff  = (uint) PerMethodRecompilationCutoff / 2 + 1;  // not zero
3336  Deoptimization::DeoptReason per_bc_reason
3337    = Deoptimization::reason_recorded_per_bytecode_if_any(reason);
3338  ciMethod* m = Deoptimization::reason_is_speculate(reason) ? this->method() : NULL;
3339  if ((per_bc_reason == Deoptimization::Reason_none
3340       || md->has_trap_at(bci, m, reason) != 0)
3341      // The trap frequency measure we care about is the recompile count:
3342      && md->trap_recompiled_at(bci, m)
3343      && md->overflow_recompile_count() >= bc_cutoff) {
3344    // Do not emit a trap here if it has already caused recompilations.
3345    // Also, if there are multiple reasons, or if there is no per-BCI record,
3346    // assume the worst.
3347    if (log())
3348      log()->elem("observe trap='%s recompiled' count='%d' recompiles2='%d'",
3349                  Deoptimization::trap_reason_name(reason),
3350                  md->trap_count(reason),
3351                  md->overflow_recompile_count());
3352    return true;
3353  } else if (trap_count(reason) != 0
3354             && decompile_count() >= m_cutoff) {
3355    // Too many recompiles globally, and we have seen this sort of trap.
3356    // Use cumulative decompile_count, not just md->decompile_count.
3357    if (log())
3358      log()->elem("observe trap='%s' count='%d' mcount='%d' decompiles='%d' mdecompiles='%d'",
3359                  Deoptimization::trap_reason_name(reason),
3360                  md->trap_count(reason), trap_count(reason),
3361                  md->decompile_count(), decompile_count());
3362    return true;
3363  } else {
3364    // The coast is clear.
3365    return false;
3366  }
3367}
3368
3369// Compute when not to trap. Used by matching trap based nodes and
3370// NullCheck optimization.
3371void Compile::set_allowed_deopt_reasons() {
3372  _allowed_reasons = 0;
3373  if (is_method_compilation()) {
3374    for (int rs = (int)Deoptimization::Reason_none+1; rs < Compile::trapHistLength; rs++) {
3375      assert(rs < BitsPerInt, "recode bit map");
3376      if (!too_many_traps((Deoptimization::DeoptReason) rs)) {
3377        _allowed_reasons |= nth_bit(rs);
3378      }
3379    }
3380  }
3381}
3382
3383#ifndef PRODUCT
3384//------------------------------verify_graph_edges---------------------------
3385// Walk the Graph and verify that there is a one-to-one correspondence
3386// between Use-Def edges and Def-Use edges in the graph.
3387void Compile::verify_graph_edges(bool no_dead_code) {
3388  if (VerifyGraphEdges) {
3389    ResourceArea *area = Thread::current()->resource_area();
3390    Unique_Node_List visited(area);
3391    // Call recursive graph walk to check edges
3392    _root->verify_edges(visited);
3393    if (no_dead_code) {
3394      // Now make sure that no visited node is used by an unvisited node.
3395      bool dead_nodes = false;
3396      Unique_Node_List checked(area);
3397      while (visited.size() > 0) {
3398        Node* n = visited.pop();
3399        checked.push(n);
3400        for (uint i = 0; i < n->outcnt(); i++) {
3401          Node* use = n->raw_out(i);
3402          if (checked.member(use))  continue;  // already checked
3403          if (visited.member(use))  continue;  // already in the graph
3404          if (use->is_Con())        continue;  // a dead ConNode is OK
3405          // At this point, we have found a dead node which is DU-reachable.
3406          if (!dead_nodes) {
3407            tty->print_cr("*** Dead nodes reachable via DU edges:");
3408            dead_nodes = true;
3409          }
3410          use->dump(2);
3411          tty->print_cr("---");
3412          checked.push(use);  // No repeats; pretend it is now checked.
3413        }
3414      }
3415      assert(!dead_nodes, "using nodes must be reachable from root");
3416    }
3417  }
3418}
3419
3420// Verify GC barriers consistency
3421// Currently supported:
3422// - G1 pre-barriers (see GraphKit::g1_write_barrier_pre())
3423void Compile::verify_barriers() {
3424  if (UseG1GC) {
3425    // Verify G1 pre-barriers
3426    const int marking_offset = in_bytes(JavaThread::satb_mark_queue_offset() + PtrQueue::byte_offset_of_active());
3427
3428    ResourceArea *area = Thread::current()->resource_area();
3429    Unique_Node_List visited(area);
3430    Node_List worklist(area);
3431    // We're going to walk control flow backwards starting from the Root
3432    worklist.push(_root);
3433    while (worklist.size() > 0) {
3434      Node* x = worklist.pop();
3435      if (x == NULL || x == top()) continue;
3436      if (visited.member(x)) {
3437        continue;
3438      } else {
3439        visited.push(x);
3440      }
3441
3442      if (x->is_Region()) {
3443        for (uint i = 1; i < x->req(); i++) {
3444          worklist.push(x->in(i));
3445        }
3446      } else {
3447        worklist.push(x->in(0));
3448        // We are looking for the pattern:
3449        //                            /->ThreadLocal
3450        // If->Bool->CmpI->LoadB->AddP->ConL(marking_offset)
3451        //              \->ConI(0)
3452        // We want to verify that the If and the LoadB have the same control
3453        // See GraphKit::g1_write_barrier_pre()
3454        if (x->is_If()) {
3455          IfNode *iff = x->as_If();
3456          if (iff->in(1)->is_Bool() && iff->in(1)->in(1)->is_Cmp()) {
3457            CmpNode *cmp = iff->in(1)->in(1)->as_Cmp();
3458            if (cmp->Opcode() == Op_CmpI && cmp->in(2)->is_Con() && cmp->in(2)->bottom_type()->is_int()->get_con() == 0
3459                && cmp->in(1)->is_Load()) {
3460              LoadNode* load = cmp->in(1)->as_Load();
3461              if (load->Opcode() == Op_LoadB && load->in(2)->is_AddP() && load->in(2)->in(2)->Opcode() == Op_ThreadLocal
3462                  && load->in(2)->in(3)->is_Con()
3463                  && load->in(2)->in(3)->bottom_type()->is_intptr_t()->get_con() == marking_offset) {
3464
3465                Node* if_ctrl = iff->in(0);
3466                Node* load_ctrl = load->in(0);
3467
3468                if (if_ctrl != load_ctrl) {
3469                  // Skip possible CProj->NeverBranch in infinite loops
3470                  if ((if_ctrl->is_Proj() && if_ctrl->Opcode() == Op_CProj)
3471                      && (if_ctrl->in(0)->is_MultiBranch() && if_ctrl->in(0)->Opcode() == Op_NeverBranch)) {
3472                    if_ctrl = if_ctrl->in(0)->in(0);
3473                  }
3474                }
3475                assert(load_ctrl != NULL && if_ctrl == load_ctrl, "controls must match");
3476              }
3477            }
3478          }
3479        }
3480      }
3481    }
3482  }
3483}
3484
3485#endif
3486
3487// The Compile object keeps track of failure reasons separately from the ciEnv.
3488// This is required because there is not quite a 1-1 relation between the
3489// ciEnv and its compilation task and the Compile object.  Note that one
3490// ciEnv might use two Compile objects, if C2Compiler::compile_method decides
3491// to backtrack and retry without subsuming loads.  Other than this backtracking
3492// behavior, the Compile's failure reason is quietly copied up to the ciEnv
3493// by the logic in C2Compiler.
3494void Compile::record_failure(const char* reason) {
3495  if (log() != NULL) {
3496    log()->elem("failure reason='%s' phase='compile'", reason);
3497  }
3498  if (_failure_reason == NULL) {
3499    // Record the first failure reason.
3500    _failure_reason = reason;
3501  }
3502
3503  EventCompilerFailure event;
3504  if (event.should_commit()) {
3505    event.set_compileID(Compile::compile_id());
3506    event.set_failure(reason);
3507    event.commit();
3508  }
3509
3510  if (!C->failure_reason_is(C2Compiler::retry_no_subsuming_loads())) {
3511    C->print_method(PHASE_FAILURE);
3512  }
3513  _root = NULL;  // flush the graph, too
3514}
3515
3516Compile::TracePhase::TracePhase(const char* name, elapsedTimer* accumulator, bool dolog)
3517  : TraceTime(NULL, accumulator, false NOT_PRODUCT( || TimeCompiler ), false),
3518    _phase_name(name), _dolog(dolog)
3519{
3520  if (dolog) {
3521    C = Compile::current();
3522    _log = C->log();
3523  } else {
3524    C = NULL;
3525    _log = NULL;
3526  }
3527  if (_log != NULL) {
3528    _log->begin_head("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());
3529    _log->stamp();
3530    _log->end_head();
3531  }
3532}
3533
3534Compile::TracePhase::~TracePhase() {
3535
3536  C = Compile::current();
3537  if (_dolog) {
3538    _log = C->log();
3539  } else {
3540    _log = NULL;
3541  }
3542
3543#ifdef ASSERT
3544  if (PrintIdealNodeCount) {
3545    tty->print_cr("phase name='%s' nodes='%d' live='%d' live_graph_walk='%d'",
3546                  _phase_name, C->unique(), C->live_nodes(), C->count_live_nodes_by_graph_walk());
3547  }
3548
3549  if (VerifyIdealNodeCount) {
3550    Compile::current()->print_missing_nodes();
3551  }
3552#endif
3553
3554  if (_log != NULL) {
3555    _log->done("phase name='%s' nodes='%d' live='%d'", _phase_name, C->unique(), C->live_nodes());
3556  }
3557}
3558
3559//=============================================================================
3560// Two Constant's are equal when the type and the value are equal.
3561bool Compile::Constant::operator==(const Constant& other) {
3562  if (type()          != other.type()         )  return false;
3563  if (can_be_reused() != other.can_be_reused())  return false;
3564  // For floating point values we compare the bit pattern.
3565  switch (type()) {
3566  case T_FLOAT:   return (_v._value.i == other._v._value.i);
3567  case T_LONG:
3568  case T_DOUBLE:  return (_v._value.j == other._v._value.j);
3569  case T_OBJECT:
3570  case T_ADDRESS: return (_v._value.l == other._v._value.l);
3571  case T_VOID:    return (_v._value.l == other._v._value.l);  // jump-table entries
3572  case T_METADATA: return (_v._metadata == other._v._metadata);
3573  default: ShouldNotReachHere();
3574  }
3575  return false;
3576}
3577
3578static int type_to_size_in_bytes(BasicType t) {
3579  switch (t) {
3580  case T_LONG:    return sizeof(jlong  );
3581  case T_FLOAT:   return sizeof(jfloat );
3582  case T_DOUBLE:  return sizeof(jdouble);
3583  case T_METADATA: return sizeof(Metadata*);
3584    // We use T_VOID as marker for jump-table entries (labels) which
3585    // need an internal word relocation.
3586  case T_VOID:
3587  case T_ADDRESS:
3588  case T_OBJECT:  return sizeof(jobject);
3589  }
3590
3591  ShouldNotReachHere();
3592  return -1;
3593}
3594
3595int Compile::ConstantTable::qsort_comparator(Constant* a, Constant* b) {
3596  // sort descending
3597  if (a->freq() > b->freq())  return -1;
3598  if (a->freq() < b->freq())  return  1;
3599  return 0;
3600}
3601
3602void Compile::ConstantTable::calculate_offsets_and_size() {
3603  // First, sort the array by frequencies.
3604  _constants.sort(qsort_comparator);
3605
3606#ifdef ASSERT
3607  // Make sure all jump-table entries were sorted to the end of the
3608  // array (they have a negative frequency).
3609  bool found_void = false;
3610  for (int i = 0; i < _constants.length(); i++) {
3611    Constant con = _constants.at(i);
3612    if (con.type() == T_VOID)
3613      found_void = true;  // jump-tables
3614    else
3615      assert(!found_void, "wrong sorting");
3616  }
3617#endif
3618
3619  int offset = 0;
3620  for (int i = 0; i < _constants.length(); i++) {
3621    Constant* con = _constants.adr_at(i);
3622
3623    // Align offset for type.
3624    int typesize = type_to_size_in_bytes(con->type());
3625    offset = align_size_up(offset, typesize);
3626    con->set_offset(offset);   // set constant's offset
3627
3628    if (con->type() == T_VOID) {
3629      MachConstantNode* n = (MachConstantNode*) con->get_jobject();
3630      offset = offset + typesize * n->outcnt();  // expand jump-table
3631    } else {
3632      offset = offset + typesize;
3633    }
3634  }
3635
3636  // Align size up to the next section start (which is insts; see
3637  // CodeBuffer::align_at_start).
3638  assert(_size == -1, "already set?");
3639  _size = align_size_up(offset, CodeEntryAlignment);
3640}
3641
3642void Compile::ConstantTable::emit(CodeBuffer& cb) {
3643  MacroAssembler _masm(&cb);
3644  for (int i = 0; i < _constants.length(); i++) {
3645    Constant con = _constants.at(i);
3646    address constant_addr;
3647    switch (con.type()) {
3648    case T_LONG:   constant_addr = _masm.long_constant(  con.get_jlong()  ); break;
3649    case T_FLOAT:  constant_addr = _masm.float_constant( con.get_jfloat() ); break;
3650    case T_DOUBLE: constant_addr = _masm.double_constant(con.get_jdouble()); break;
3651    case T_OBJECT: {
3652      jobject obj = con.get_jobject();
3653      int oop_index = _masm.oop_recorder()->find_index(obj);
3654      constant_addr = _masm.address_constant((address) obj, oop_Relocation::spec(oop_index));
3655      break;
3656    }
3657    case T_ADDRESS: {
3658      address addr = (address) con.get_jobject();
3659      constant_addr = _masm.address_constant(addr);
3660      break;
3661    }
3662    // We use T_VOID as marker for jump-table entries (labels) which
3663    // need an internal word relocation.
3664    case T_VOID: {
3665      MachConstantNode* n = (MachConstantNode*) con.get_jobject();
3666      // Fill the jump-table with a dummy word.  The real value is
3667      // filled in later in fill_jump_table.
3668      address dummy = (address) n;
3669      constant_addr = _masm.address_constant(dummy);
3670      // Expand jump-table
3671      for (uint i = 1; i < n->outcnt(); i++) {
3672        address temp_addr = _masm.address_constant(dummy + i);
3673        assert(temp_addr, "consts section too small");
3674      }
3675      break;
3676    }
3677    case T_METADATA: {
3678      Metadata* obj = con.get_metadata();
3679      int metadata_index = _masm.oop_recorder()->find_index(obj);
3680      constant_addr = _masm.address_constant((address) obj, metadata_Relocation::spec(metadata_index));
3681      break;
3682    }
3683    default: ShouldNotReachHere();
3684    }
3685    assert(constant_addr, "consts section too small");
3686    assert((constant_addr - _masm.code()->consts()->start()) == con.offset(),
3687            err_msg_res("must be: %d == %d", (int) (constant_addr - _masm.code()->consts()->start()), (int)(con.offset())));
3688  }
3689}
3690
3691int Compile::ConstantTable::find_offset(Constant& con) const {
3692  int idx = _constants.find(con);
3693  assert(idx != -1, "constant must be in constant table");
3694  int offset = _constants.at(idx).offset();
3695  assert(offset != -1, "constant table not emitted yet?");
3696  return offset;
3697}
3698
3699void Compile::ConstantTable::add(Constant& con) {
3700  if (con.can_be_reused()) {
3701    int idx = _constants.find(con);
3702    if (idx != -1 && _constants.at(idx).can_be_reused()) {
3703      _constants.adr_at(idx)->inc_freq(con.freq());  // increase the frequency by the current value
3704      return;
3705    }
3706  }
3707  (void) _constants.append(con);
3708}
3709
3710Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, BasicType type, jvalue value) {
3711  Block* b = Compile::current()->cfg()->get_block_for_node(n);
3712  Constant con(type, value, b->_freq);
3713  add(con);
3714  return con;
3715}
3716
3717Compile::Constant Compile::ConstantTable::add(Metadata* metadata) {
3718  Constant con(metadata);
3719  add(con);
3720  return con;
3721}
3722
3723Compile::Constant Compile::ConstantTable::add(MachConstantNode* n, MachOper* oper) {
3724  jvalue value;
3725  BasicType type = oper->type()->basic_type();
3726  switch (type) {
3727  case T_LONG:    value.j = oper->constantL(); break;
3728  case T_FLOAT:   value.f = oper->constantF(); break;
3729  case T_DOUBLE:  value.d = oper->constantD(); break;
3730  case T_OBJECT:
3731  case T_ADDRESS: value.l = (jobject) oper->constant(); break;
3732  case T_METADATA: return add((Metadata*)oper->constant()); break;
3733  default: guarantee(false, err_msg_res("unhandled type: %s", type2name(type)));
3734  }
3735  return add(n, type, value);
3736}
3737
3738Compile::Constant Compile::ConstantTable::add_jump_table(MachConstantNode* n) {
3739  jvalue value;
3740  // We can use the node pointer here to identify the right jump-table
3741  // as this method is called from Compile::Fill_buffer right before
3742  // the MachNodes are emitted and the jump-table is filled (means the
3743  // MachNode pointers do not change anymore).
3744  value.l = (jobject) n;
3745  Constant con(T_VOID, value, next_jump_table_freq(), false);  // Labels of a jump-table cannot be reused.
3746  add(con);
3747  return con;
3748}
3749
3750void Compile::ConstantTable::fill_jump_table(CodeBuffer& cb, MachConstantNode* n, GrowableArray<Label*> labels) const {
3751  // If called from Compile::scratch_emit_size do nothing.
3752  if (Compile::current()->in_scratch_emit_size())  return;
3753
3754  assert(labels.is_nonempty(), "must be");
3755  assert((uint) labels.length() == n->outcnt(), err_msg_res("must be equal: %d == %d", labels.length(), n->outcnt()));
3756
3757  // Since MachConstantNode::constant_offset() also contains
3758  // table_base_offset() we need to subtract the table_base_offset()
3759  // to get the plain offset into the constant table.
3760  int offset = n->constant_offset() - table_base_offset();
3761
3762  MacroAssembler _masm(&cb);
3763  address* jump_table_base = (address*) (_masm.code()->consts()->start() + offset);
3764
3765  for (uint i = 0; i < n->outcnt(); i++) {
3766    address* constant_addr = &jump_table_base[i];
3767    assert(*constant_addr == (((address) n) + i), err_msg_res("all jump-table entries must contain adjusted node pointer: " INTPTR_FORMAT " == " INTPTR_FORMAT, p2i(*constant_addr), p2i(((address) n) + i)));
3768    *constant_addr = cb.consts()->target(*labels.at(i), (address) constant_addr);
3769    cb.consts()->relocate((address) constant_addr, relocInfo::internal_word_type);
3770  }
3771}
3772
3773// The message about the current inlining is accumulated in
3774// _print_inlining_stream and transfered into the _print_inlining_list
3775// once we know whether inlining succeeds or not. For regular
3776// inlining, messages are appended to the buffer pointed by
3777// _print_inlining_idx in the _print_inlining_list. For late inlining,
3778// a new buffer is added after _print_inlining_idx in the list. This
3779// way we can update the inlining message for late inlining call site
3780// when the inlining is attempted again.
3781void Compile::print_inlining_init() {
3782  if (print_inlining() || print_intrinsics()) {
3783    _print_inlining_stream = new stringStream();
3784    _print_inlining_list = new (comp_arena())GrowableArray<PrintInliningBuffer>(comp_arena(), 1, 1, PrintInliningBuffer());
3785  }
3786}
3787
3788void Compile::print_inlining_reinit() {
3789  if (print_inlining() || print_intrinsics()) {
3790    // Re allocate buffer when we change ResourceMark
3791    _print_inlining_stream = new stringStream();
3792  }
3793}
3794
3795void Compile::print_inlining_reset() {
3796  _print_inlining_stream->reset();
3797}
3798
3799void Compile::print_inlining_commit() {
3800  assert(print_inlining() || print_intrinsics(), "PrintInlining off?");
3801  // Transfer the message from _print_inlining_stream to the current
3802  // _print_inlining_list buffer and clear _print_inlining_stream.
3803  _print_inlining_list->at(_print_inlining_idx).ss()->write(_print_inlining_stream->as_string(), _print_inlining_stream->size());
3804  print_inlining_reset();
3805}
3806
3807void Compile::print_inlining_push() {
3808  // Add new buffer to the _print_inlining_list at current position
3809  _print_inlining_idx++;
3810  _print_inlining_list->insert_before(_print_inlining_idx, PrintInliningBuffer());
3811}
3812
3813Compile::PrintInliningBuffer& Compile::print_inlining_current() {
3814  return _print_inlining_list->at(_print_inlining_idx);
3815}
3816
3817void Compile::print_inlining_update(CallGenerator* cg) {
3818  if (print_inlining() || print_intrinsics()) {
3819    if (!cg->is_late_inline()) {
3820      if (print_inlining_current().cg() != NULL) {
3821        print_inlining_push();
3822      }
3823      print_inlining_commit();
3824    } else {
3825      if (print_inlining_current().cg() != cg &&
3826          (print_inlining_current().cg() != NULL ||
3827           print_inlining_current().ss()->size() != 0)) {
3828        print_inlining_push();
3829      }
3830      print_inlining_commit();
3831      print_inlining_current().set_cg(cg);
3832    }
3833  }
3834}
3835
3836void Compile::print_inlining_move_to(CallGenerator* cg) {
3837  // We resume inlining at a late inlining call site. Locate the
3838  // corresponding inlining buffer so that we can update it.
3839  if (print_inlining()) {
3840    for (int i = 0; i < _print_inlining_list->length(); i++) {
3841      if (_print_inlining_list->adr_at(i)->cg() == cg) {
3842        _print_inlining_idx = i;
3843        return;
3844      }
3845    }
3846    ShouldNotReachHere();
3847  }
3848}
3849
3850void Compile::print_inlining_update_delayed(CallGenerator* cg) {
3851  if (print_inlining()) {
3852    assert(_print_inlining_stream->size() > 0, "missing inlining msg");
3853    assert(print_inlining_current().cg() == cg, "wrong entry");
3854    // replace message with new message
3855    _print_inlining_list->at_put(_print_inlining_idx, PrintInliningBuffer());
3856    print_inlining_commit();
3857    print_inlining_current().set_cg(cg);
3858  }
3859}
3860
3861void Compile::print_inlining_assert_ready() {
3862  assert(!_print_inlining || _print_inlining_stream->size() == 0, "loosing data");
3863}
3864
3865void Compile::process_print_inlining() {
3866  bool do_print_inlining = print_inlining() || print_intrinsics();
3867  if (do_print_inlining || log() != NULL) {
3868    // Print inlining message for candidates that we couldn't inline
3869    // for lack of space
3870    for (int i = 0; i < _late_inlines.length(); i++) {
3871      CallGenerator* cg = _late_inlines.at(i);
3872      if (!cg->is_mh_late_inline()) {
3873        const char* msg = "live nodes > LiveNodeCountInliningCutoff";
3874        if (do_print_inlining) {
3875          cg->print_inlining_late(msg);
3876        }
3877        log_late_inline_failure(cg, msg);
3878      }
3879    }
3880  }
3881  if (do_print_inlining) {
3882    ResourceMark rm;
3883    stringStream ss;
3884    for (int i = 0; i < _print_inlining_list->length(); i++) {
3885      ss.print("%s", _print_inlining_list->adr_at(i)->ss()->as_string());
3886    }
3887    size_t end = ss.size();
3888    _print_inlining_output = NEW_ARENA_ARRAY(comp_arena(), char, end+1);
3889    strncpy(_print_inlining_output, ss.base(), end+1);
3890    _print_inlining_output[end] = 0;
3891  }
3892}
3893
3894void Compile::dump_print_inlining() {
3895  if (_print_inlining_output != NULL) {
3896    tty->print_raw(_print_inlining_output);
3897  }
3898}
3899
3900void Compile::log_late_inline(CallGenerator* cg) {
3901  if (log() != NULL) {
3902    log()->head("late_inline method='%d'  inline_id='" JLONG_FORMAT "'", log()->identify(cg->method()),
3903                cg->unique_id());
3904    JVMState* p = cg->call_node()->jvms();
3905    while (p != NULL) {
3906      log()->elem("jvms bci='%d' method='%d'", p->bci(), log()->identify(p->method()));
3907      p = p->caller();
3908    }
3909    log()->tail("late_inline");
3910  }
3911}
3912
3913void Compile::log_late_inline_failure(CallGenerator* cg, const char* msg) {
3914  log_late_inline(cg);
3915  if (log() != NULL) {
3916    log()->inline_fail(msg);
3917  }
3918}
3919
3920void Compile::log_inline_id(CallGenerator* cg) {
3921  if (log() != NULL) {
3922    // The LogCompilation tool needs a unique way to identify late
3923    // inline call sites. This id must be unique for this call site in
3924    // this compilation. Try to have it unique across compilations as
3925    // well because it can be convenient when grepping through the log
3926    // file.
3927    // Distinguish OSR compilations from others in case CICountOSR is
3928    // on.
3929    jlong id = ((jlong)unique()) + (((jlong)compile_id()) << 33) + (CICountOSR && is_osr_compilation() ? ((jlong)1) << 32 : 0);
3930    cg->set_unique_id(id);
3931    log()->elem("inline_id id='" JLONG_FORMAT "'", id);
3932  }
3933}
3934
3935void Compile::log_inline_failure(const char* msg) {
3936  if (C->log() != NULL) {
3937    C->log()->inline_fail(msg);
3938  }
3939}
3940
3941
3942// Dump inlining replay data to the stream.
3943// Don't change thread state and acquire any locks.
3944void Compile::dump_inline_data(outputStream* out) {
3945  InlineTree* inl_tree = ilt();
3946  if (inl_tree != NULL) {
3947    out->print(" inline %d", inl_tree->count());
3948    inl_tree->dump_replay_data(out);
3949  }
3950}
3951
3952int Compile::cmp_expensive_nodes(Node* n1, Node* n2) {
3953  if (n1->Opcode() < n2->Opcode())      return -1;
3954  else if (n1->Opcode() > n2->Opcode()) return 1;
3955
3956  assert(n1->req() == n2->req(), err_msg_res("can't compare %s nodes: n1->req() = %d, n2->req() = %d", NodeClassNames[n1->Opcode()], n1->req(), n2->req()));
3957  for (uint i = 1; i < n1->req(); i++) {
3958    if (n1->in(i) < n2->in(i))      return -1;
3959    else if (n1->in(i) > n2->in(i)) return 1;
3960  }
3961
3962  return 0;
3963}
3964
3965int Compile::cmp_expensive_nodes(Node** n1p, Node** n2p) {
3966  Node* n1 = *n1p;
3967  Node* n2 = *n2p;
3968
3969  return cmp_expensive_nodes(n1, n2);
3970}
3971
3972void Compile::sort_expensive_nodes() {
3973  if (!expensive_nodes_sorted()) {
3974    _expensive_nodes->sort(cmp_expensive_nodes);
3975  }
3976}
3977
3978bool Compile::expensive_nodes_sorted() const {
3979  for (int i = 1; i < _expensive_nodes->length(); i++) {
3980    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i-1)) < 0) {
3981      return false;
3982    }
3983  }
3984  return true;
3985}
3986
3987bool Compile::should_optimize_expensive_nodes(PhaseIterGVN &igvn) {
3988  if (_expensive_nodes->length() == 0) {
3989    return false;
3990  }
3991
3992  assert(OptimizeExpensiveOps, "optimization off?");
3993
3994  // Take this opportunity to remove dead nodes from the list
3995  int j = 0;
3996  for (int i = 0; i < _expensive_nodes->length(); i++) {
3997    Node* n = _expensive_nodes->at(i);
3998    if (!n->is_unreachable(igvn)) {
3999      assert(n->is_expensive(), "should be expensive");
4000      _expensive_nodes->at_put(j, n);
4001      j++;
4002    }
4003  }
4004  _expensive_nodes->trunc_to(j);
4005
4006  // Then sort the list so that similar nodes are next to each other
4007  // and check for at least two nodes of identical kind with same data
4008  // inputs.
4009  sort_expensive_nodes();
4010
4011  for (int i = 0; i < _expensive_nodes->length()-1; i++) {
4012    if (cmp_expensive_nodes(_expensive_nodes->adr_at(i), _expensive_nodes->adr_at(i+1)) == 0) {
4013      return true;
4014    }
4015  }
4016
4017  return false;
4018}
4019
4020void Compile::cleanup_expensive_nodes(PhaseIterGVN &igvn) {
4021  if (_expensive_nodes->length() == 0) {
4022    return;
4023  }
4024
4025  assert(OptimizeExpensiveOps, "optimization off?");
4026
4027  // Sort to bring similar nodes next to each other and clear the
4028  // control input of nodes for which there's only a single copy.
4029  sort_expensive_nodes();
4030
4031  int j = 0;
4032  int identical = 0;
4033  int i = 0;
4034  for (; i < _expensive_nodes->length()-1; i++) {
4035    assert(j <= i, "can't write beyond current index");
4036    if (_expensive_nodes->at(i)->Opcode() == _expensive_nodes->at(i+1)->Opcode()) {
4037      identical++;
4038      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
4039      continue;
4040    }
4041    if (identical > 0) {
4042      _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
4043      identical = 0;
4044    } else {
4045      Node* n = _expensive_nodes->at(i);
4046      igvn.hash_delete(n);
4047      n->set_req(0, NULL);
4048      igvn.hash_insert(n);
4049    }
4050  }
4051  if (identical > 0) {
4052    _expensive_nodes->at_put(j++, _expensive_nodes->at(i));
4053  } else if (_expensive_nodes->length() >= 1) {
4054    Node* n = _expensive_nodes->at(i);
4055    igvn.hash_delete(n);
4056    n->set_req(0, NULL);
4057    igvn.hash_insert(n);
4058  }
4059  _expensive_nodes->trunc_to(j);
4060}
4061
4062void Compile::add_expensive_node(Node * n) {
4063  assert(!_expensive_nodes->contains(n), "duplicate entry in expensive list");
4064  assert(n->is_expensive(), "expensive nodes with non-null control here only");
4065  assert(!n->is_CFG() && !n->is_Mem(), "no cfg or memory nodes here");
4066  if (OptimizeExpensiveOps) {
4067    _expensive_nodes->append(n);
4068  } else {
4069    // Clear control input and let IGVN optimize expensive nodes if
4070    // OptimizeExpensiveOps is off.
4071    n->set_req(0, NULL);
4072  }
4073}
4074
4075/**
4076 * Remove the speculative part of types and clean up the graph
4077 */
4078void Compile::remove_speculative_types(PhaseIterGVN &igvn) {
4079  if (UseTypeSpeculation) {
4080    Unique_Node_List worklist;
4081    worklist.push(root());
4082    int modified = 0;
4083    // Go over all type nodes that carry a speculative type, drop the
4084    // speculative part of the type and enqueue the node for an igvn
4085    // which may optimize it out.
4086    for (uint next = 0; next < worklist.size(); ++next) {
4087      Node *n  = worklist.at(next);
4088      if (n->is_Type()) {
4089        TypeNode* tn = n->as_Type();
4090        const Type* t = tn->type();
4091        const Type* t_no_spec = t->remove_speculative();
4092        if (t_no_spec != t) {
4093          bool in_hash = igvn.hash_delete(n);
4094          assert(in_hash, "node should be in igvn hash table");
4095          tn->set_type(t_no_spec);
4096          igvn.hash_insert(n);
4097          igvn._worklist.push(n); // give it a chance to go away
4098          modified++;
4099        }
4100      }
4101      uint max = n->len();
4102      for( uint i = 0; i < max; ++i ) {
4103        Node *m = n->in(i);
4104        if (not_a_node(m))  continue;
4105        worklist.push(m);
4106      }
4107    }
4108    // Drop the speculative part of all types in the igvn's type table
4109    igvn.remove_speculative_types();
4110    if (modified > 0) {
4111      igvn.optimize();
4112    }
4113#ifdef ASSERT
4114    // Verify that after the IGVN is over no speculative type has resurfaced
4115    worklist.clear();
4116    worklist.push(root());
4117    for (uint next = 0; next < worklist.size(); ++next) {
4118      Node *n  = worklist.at(next);
4119      const Type* t = igvn.type_or_null(n);
4120      assert((t == NULL) || (t == t->remove_speculative()), "no more speculative types");
4121      if (n->is_Type()) {
4122        t = n->as_Type()->type();
4123        assert(t == t->remove_speculative(), "no more speculative types");
4124      }
4125      uint max = n->len();
4126      for( uint i = 0; i < max; ++i ) {
4127        Node *m = n->in(i);
4128        if (not_a_node(m))  continue;
4129        worklist.push(m);
4130      }
4131    }
4132    igvn.check_no_speculative_types();
4133#endif
4134  }
4135}
4136
4137// Auxiliary method to support randomized stressing/fuzzing.
4138//
4139// This method can be called the arbitrary number of times, with current count
4140// as the argument. The logic allows selecting a single candidate from the
4141// running list of candidates as follows:
4142//    int count = 0;
4143//    Cand* selected = null;
4144//    while(cand = cand->next()) {
4145//      if (randomized_select(++count)) {
4146//        selected = cand;
4147//      }
4148//    }
4149//
4150// Including count equalizes the chances any candidate is "selected".
4151// This is useful when we don't have the complete list of candidates to choose
4152// from uniformly. In this case, we need to adjust the randomicity of the
4153// selection, or else we will end up biasing the selection towards the latter
4154// candidates.
4155//
4156// Quick back-envelope calculation shows that for the list of n candidates
4157// the equal probability for the candidate to persist as "best" can be
4158// achieved by replacing it with "next" k-th candidate with the probability
4159// of 1/k. It can be easily shown that by the end of the run, the
4160// probability for any candidate is converged to 1/n, thus giving the
4161// uniform distribution among all the candidates.
4162//
4163// We don't care about the domain size as long as (RANDOMIZED_DOMAIN / count) is large.
4164#define RANDOMIZED_DOMAIN_POW 29
4165#define RANDOMIZED_DOMAIN (1 << RANDOMIZED_DOMAIN_POW)
4166#define RANDOMIZED_DOMAIN_MASK ((1 << (RANDOMIZED_DOMAIN_POW + 1)) - 1)
4167bool Compile::randomized_select(int count) {
4168  assert(count > 0, "only positive");
4169  return (os::random() & RANDOMIZED_DOMAIN_MASK) < (RANDOMIZED_DOMAIN / count);
4170}
4171