matcher.cpp revision 164:c436414a719e
1/*
2 * Copyright 1997-2007 Sun Microsystems, Inc.  All Rights Reserved.
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * This code is free software; you can redistribute it and/or modify it
6 * under the terms of the GNU General Public License version 2 only, as
7 * published by the Free Software Foundation.
8 *
9 * This code is distributed in the hope that it will be useful, but WITHOUT
10 * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 * version 2 for more details (a copy is included in the LICENSE file that
13 * accompanied this code).
14 *
15 * You should have received a copy of the GNU General Public License version
16 * 2 along with this work; if not, write to the Free Software Foundation,
17 * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18 *
19 * Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20 * CA 95054 USA or visit www.sun.com if you need additional information or
21 * have any questions.
22 *
23 */
24
25#include "incls/_precompiled.incl"
26#include "incls/_matcher.cpp.incl"
27
28OptoReg::Name OptoReg::c_frame_pointer;
29
30
31
32const int Matcher::base2reg[Type::lastype] = {
33  Node::NotAMachineReg,0,0, Op_RegI, Op_RegL, 0, Op_RegN,
34  Node::NotAMachineReg, Node::NotAMachineReg, /* tuple, array */
35  Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, Op_RegP, /* the pointers */
36  0, 0/*abio*/,
37  Op_RegP /* Return address */, 0, /* the memories */
38  Op_RegF, Op_RegF, Op_RegF, Op_RegD, Op_RegD, Op_RegD,
39  0  /*bottom*/
40};
41
42const RegMask *Matcher::idealreg2regmask[_last_machine_leaf];
43RegMask Matcher::mreg2regmask[_last_Mach_Reg];
44RegMask Matcher::STACK_ONLY_mask;
45RegMask Matcher::c_frame_ptr_mask;
46const uint Matcher::_begin_rematerialize = _BEGIN_REMATERIALIZE;
47const uint Matcher::_end_rematerialize   = _END_REMATERIALIZE;
48
49//---------------------------Matcher-------------------------------------------
50Matcher::Matcher( Node_List &proj_list ) :
51  PhaseTransform( Phase::Ins_Select ),
52#ifdef ASSERT
53  _old2new_map(C->comp_arena()),
54#endif
55  _shared_constants(C->comp_arena()),
56  _reduceOp(reduceOp), _leftOp(leftOp), _rightOp(rightOp),
57  _swallowed(swallowed),
58  _begin_inst_chain_rule(_BEGIN_INST_CHAIN_RULE),
59  _end_inst_chain_rule(_END_INST_CHAIN_RULE),
60  _must_clone(must_clone), _proj_list(proj_list),
61  _register_save_policy(register_save_policy),
62  _c_reg_save_policy(c_reg_save_policy),
63  _register_save_type(register_save_type),
64  _ruleName(ruleName),
65  _allocation_started(false),
66  _states_arena(Chunk::medium_size),
67  _visited(&_states_arena),
68  _shared(&_states_arena),
69  _dontcare(&_states_arena) {
70  C->set_matcher(this);
71
72  idealreg2spillmask[Op_RegI] = NULL;
73  idealreg2spillmask[Op_RegN] = NULL;
74  idealreg2spillmask[Op_RegL] = NULL;
75  idealreg2spillmask[Op_RegF] = NULL;
76  idealreg2spillmask[Op_RegD] = NULL;
77  idealreg2spillmask[Op_RegP] = NULL;
78
79  idealreg2debugmask[Op_RegI] = NULL;
80  idealreg2debugmask[Op_RegN] = NULL;
81  idealreg2debugmask[Op_RegL] = NULL;
82  idealreg2debugmask[Op_RegF] = NULL;
83  idealreg2debugmask[Op_RegD] = NULL;
84  idealreg2debugmask[Op_RegP] = NULL;
85}
86
87//------------------------------warp_incoming_stk_arg------------------------
88// This warps a VMReg into an OptoReg::Name
89OptoReg::Name Matcher::warp_incoming_stk_arg( VMReg reg ) {
90  OptoReg::Name warped;
91  if( reg->is_stack() ) {  // Stack slot argument?
92    warped = OptoReg::add(_old_SP, reg->reg2stack() );
93    warped = OptoReg::add(warped, C->out_preserve_stack_slots());
94    if( warped >= _in_arg_limit )
95      _in_arg_limit = OptoReg::add(warped, 1); // Bump max stack slot seen
96    if (!RegMask::can_represent(warped)) {
97      // the compiler cannot represent this method's calling sequence
98      C->record_method_not_compilable_all_tiers("unsupported incoming calling sequence");
99      return OptoReg::Bad;
100    }
101    return warped;
102  }
103  return OptoReg::as_OptoReg(reg);
104}
105
106//---------------------------compute_old_SP------------------------------------
107OptoReg::Name Compile::compute_old_SP() {
108  int fixed    = fixed_slots();
109  int preserve = in_preserve_stack_slots();
110  return OptoReg::stack2reg(round_to(fixed + preserve, Matcher::stack_alignment_in_slots()));
111}
112
113
114
115#ifdef ASSERT
116void Matcher::verify_new_nodes_only(Node* xroot) {
117  // Make sure that the new graph only references new nodes
118  ResourceMark rm;
119  Unique_Node_List worklist;
120  VectorSet visited(Thread::current()->resource_area());
121  worklist.push(xroot);
122  while (worklist.size() > 0) {
123    Node* n = worklist.pop();
124    visited <<= n->_idx;
125    assert(C->node_arena()->contains(n), "dead node");
126    for (uint j = 0; j < n->req(); j++) {
127      Node* in = n->in(j);
128      if (in != NULL) {
129        assert(C->node_arena()->contains(in), "dead node");
130        if (!visited.test(in->_idx)) {
131          worklist.push(in);
132        }
133      }
134    }
135  }
136}
137#endif
138
139
140//---------------------------match---------------------------------------------
141void Matcher::match( ) {
142  // One-time initialization of some register masks.
143  init_spill_mask( C->root()->in(1) );
144  _return_addr_mask = return_addr();
145#ifdef _LP64
146  // Pointers take 2 slots in 64-bit land
147  _return_addr_mask.Insert(OptoReg::add(return_addr(),1));
148#endif
149
150  // Map a Java-signature return type into return register-value
151  // machine registers for 0, 1 and 2 returned values.
152  const TypeTuple *range = C->tf()->range();
153  if( range->cnt() > TypeFunc::Parms ) { // If not a void function
154    // Get ideal-register return type
155    int ireg = base2reg[range->field_at(TypeFunc::Parms)->base()];
156    // Get machine return register
157    uint sop = C->start()->Opcode();
158    OptoRegPair regs = return_value(ireg, false);
159
160    // And mask for same
161    _return_value_mask = RegMask(regs.first());
162    if( OptoReg::is_valid(regs.second()) )
163      _return_value_mask.Insert(regs.second());
164  }
165
166  // ---------------
167  // Frame Layout
168
169  // Need the method signature to determine the incoming argument types,
170  // because the types determine which registers the incoming arguments are
171  // in, and this affects the matched code.
172  const TypeTuple *domain = C->tf()->domain();
173  uint             argcnt = domain->cnt() - TypeFunc::Parms;
174  BasicType *sig_bt        = NEW_RESOURCE_ARRAY( BasicType, argcnt );
175  VMRegPair *vm_parm_regs  = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
176  _parm_regs               = NEW_RESOURCE_ARRAY( OptoRegPair, argcnt );
177  _calling_convention_mask = NEW_RESOURCE_ARRAY( RegMask, argcnt );
178  uint i;
179  for( i = 0; i<argcnt; i++ ) {
180    sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
181  }
182
183  // Pass array of ideal registers and length to USER code (from the AD file)
184  // that will convert this to an array of register numbers.
185  const StartNode *start = C->start();
186  start->calling_convention( sig_bt, vm_parm_regs, argcnt );
187#ifdef ASSERT
188  // Sanity check users' calling convention.  Real handy while trying to
189  // get the initial port correct.
190  { for (uint i = 0; i<argcnt; i++) {
191      if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
192        assert(domain->field_at(i+TypeFunc::Parms)==Type::HALF, "only allowed on halve" );
193        _parm_regs[i].set_bad();
194        continue;
195      }
196      VMReg parm_reg = vm_parm_regs[i].first();
197      assert(parm_reg->is_valid(), "invalid arg?");
198      if (parm_reg->is_reg()) {
199        OptoReg::Name opto_parm_reg = OptoReg::as_OptoReg(parm_reg);
200        assert(can_be_java_arg(opto_parm_reg) ||
201               C->stub_function() == CAST_FROM_FN_PTR(address, OptoRuntime::rethrow_C) ||
202               opto_parm_reg == inline_cache_reg(),
203               "parameters in register must be preserved by runtime stubs");
204      }
205      for (uint j = 0; j < i; j++) {
206        assert(parm_reg != vm_parm_regs[j].first(),
207               "calling conv. must produce distinct regs");
208      }
209    }
210  }
211#endif
212
213  // Do some initial frame layout.
214
215  // Compute the old incoming SP (may be called FP) as
216  //   OptoReg::stack0() + locks + in_preserve_stack_slots + pad2.
217  _old_SP = C->compute_old_SP();
218  assert( is_even(_old_SP), "must be even" );
219
220  // Compute highest incoming stack argument as
221  //   _old_SP + out_preserve_stack_slots + incoming argument size.
222  _in_arg_limit = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
223  assert( is_even(_in_arg_limit), "out_preserve must be even" );
224  for( i = 0; i < argcnt; i++ ) {
225    // Permit args to have no register
226    _calling_convention_mask[i].Clear();
227    if( !vm_parm_regs[i].first()->is_valid() && !vm_parm_regs[i].second()->is_valid() ) {
228      continue;
229    }
230    // calling_convention returns stack arguments as a count of
231    // slots beyond OptoReg::stack0()/VMRegImpl::stack0.  We need to convert this to
232    // the allocators point of view, taking into account all the
233    // preserve area, locks & pad2.
234
235    OptoReg::Name reg1 = warp_incoming_stk_arg(vm_parm_regs[i].first());
236    if( OptoReg::is_valid(reg1))
237      _calling_convention_mask[i].Insert(reg1);
238
239    OptoReg::Name reg2 = warp_incoming_stk_arg(vm_parm_regs[i].second());
240    if( OptoReg::is_valid(reg2))
241      _calling_convention_mask[i].Insert(reg2);
242
243    // Saved biased stack-slot register number
244    _parm_regs[i].set_pair(reg2, reg1);
245  }
246
247  // Finally, make sure the incoming arguments take up an even number of
248  // words, in case the arguments or locals need to contain doubleword stack
249  // slots.  The rest of the system assumes that stack slot pairs (in
250  // particular, in the spill area) which look aligned will in fact be
251  // aligned relative to the stack pointer in the target machine.  Double
252  // stack slots will always be allocated aligned.
253  _new_SP = OptoReg::Name(round_to(_in_arg_limit, RegMask::SlotsPerLong));
254
255  // Compute highest outgoing stack argument as
256  //   _new_SP + out_preserve_stack_slots + max(outgoing argument size).
257  _out_arg_limit = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
258  assert( is_even(_out_arg_limit), "out_preserve must be even" );
259
260  if (!RegMask::can_represent(OptoReg::add(_out_arg_limit,-1))) {
261    // the compiler cannot represent this method's calling sequence
262    C->record_method_not_compilable("must be able to represent all call arguments in reg mask");
263  }
264
265  if (C->failing())  return;  // bailed out on incoming arg failure
266
267  // ---------------
268  // Collect roots of matcher trees.  Every node for which
269  // _shared[_idx] is cleared is guaranteed to not be shared, and thus
270  // can be a valid interior of some tree.
271  find_shared( C->root() );
272  find_shared( C->top() );
273
274  C->print_method("Before Matching", 2);
275
276  // Swap out to old-space; emptying new-space
277  Arena *old = C->node_arena()->move_contents(C->old_arena());
278
279  // Save debug and profile information for nodes in old space:
280  _old_node_note_array = C->node_note_array();
281  if (_old_node_note_array != NULL) {
282    C->set_node_note_array(new(C->comp_arena()) GrowableArray<Node_Notes*>
283                           (C->comp_arena(), _old_node_note_array->length(),
284                            0, NULL));
285  }
286
287  // Pre-size the new_node table to avoid the need for range checks.
288  grow_new_node_array(C->unique());
289
290  // Reset node counter so MachNodes start with _idx at 0
291  int nodes = C->unique(); // save value
292  C->set_unique(0);
293
294  // Recursively match trees from old space into new space.
295  // Correct leaves of new-space Nodes; they point to old-space.
296  _visited.Clear();             // Clear visit bits for xform call
297  C->set_cached_top_node(xform( C->top(), nodes ));
298  if (!C->failing()) {
299    Node* xroot =        xform( C->root(), 1 );
300    if (xroot == NULL) {
301      Matcher::soft_match_failure();  // recursive matching process failed
302      C->record_method_not_compilable("instruction match failed");
303    } else {
304      // During matching shared constants were attached to C->root()
305      // because xroot wasn't available yet, so transfer the uses to
306      // the xroot.
307      for( DUIterator_Fast jmax, j = C->root()->fast_outs(jmax); j < jmax; j++ ) {
308        Node* n = C->root()->fast_out(j);
309        if (C->node_arena()->contains(n)) {
310          assert(n->in(0) == C->root(), "should be control user");
311          n->set_req(0, xroot);
312          --j;
313          --jmax;
314        }
315      }
316
317      C->set_root(xroot->is_Root() ? xroot->as_Root() : NULL);
318#ifdef ASSERT
319      verify_new_nodes_only(xroot);
320#endif
321    }
322  }
323  if (C->top() == NULL || C->root() == NULL) {
324    C->record_method_not_compilable("graph lost"); // %%% cannot happen?
325  }
326  if (C->failing()) {
327    // delete old;
328    old->destruct_contents();
329    return;
330  }
331  assert( C->top(), "" );
332  assert( C->root(), "" );
333  validate_null_checks();
334
335  // Now smoke old-space
336  NOT_DEBUG( old->destruct_contents() );
337
338  // ------------------------
339  // Set up save-on-entry registers
340  Fixup_Save_On_Entry( );
341}
342
343
344//------------------------------Fixup_Save_On_Entry----------------------------
345// The stated purpose of this routine is to take care of save-on-entry
346// registers.  However, the overall goal of the Match phase is to convert into
347// machine-specific instructions which have RegMasks to guide allocation.
348// So what this procedure really does is put a valid RegMask on each input
349// to the machine-specific variations of all Return, TailCall and Halt
350// instructions.  It also adds edgs to define the save-on-entry values (and of
351// course gives them a mask).
352
353static RegMask *init_input_masks( uint size, RegMask &ret_adr, RegMask &fp ) {
354  RegMask *rms = NEW_RESOURCE_ARRAY( RegMask, size );
355  // Do all the pre-defined register masks
356  rms[TypeFunc::Control  ] = RegMask::Empty;
357  rms[TypeFunc::I_O      ] = RegMask::Empty;
358  rms[TypeFunc::Memory   ] = RegMask::Empty;
359  rms[TypeFunc::ReturnAdr] = ret_adr;
360  rms[TypeFunc::FramePtr ] = fp;
361  return rms;
362}
363
364//---------------------------init_first_stack_mask-----------------------------
365// Create the initial stack mask used by values spilling to the stack.
366// Disallow any debug info in outgoing argument areas by setting the
367// initial mask accordingly.
368void Matcher::init_first_stack_mask() {
369
370  // Allocate storage for spill masks as masks for the appropriate load type.
371  RegMask *rms = (RegMask*)C->comp_arena()->Amalloc_D(sizeof(RegMask)*12);
372  idealreg2spillmask[Op_RegN] = &rms[0];
373  idealreg2spillmask[Op_RegI] = &rms[1];
374  idealreg2spillmask[Op_RegL] = &rms[2];
375  idealreg2spillmask[Op_RegF] = &rms[3];
376  idealreg2spillmask[Op_RegD] = &rms[4];
377  idealreg2spillmask[Op_RegP] = &rms[5];
378  idealreg2debugmask[Op_RegN] = &rms[6];
379  idealreg2debugmask[Op_RegI] = &rms[7];
380  idealreg2debugmask[Op_RegL] = &rms[8];
381  idealreg2debugmask[Op_RegF] = &rms[9];
382  idealreg2debugmask[Op_RegD] = &rms[10];
383  idealreg2debugmask[Op_RegP] = &rms[11];
384
385  OptoReg::Name i;
386
387  // At first, start with the empty mask
388  C->FIRST_STACK_mask().Clear();
389
390  // Add in the incoming argument area
391  OptoReg::Name init = OptoReg::add(_old_SP, C->out_preserve_stack_slots());
392  for (i = init; i < _in_arg_limit; i = OptoReg::add(i,1))
393    C->FIRST_STACK_mask().Insert(i);
394
395  // Add in all bits past the outgoing argument area
396  guarantee(RegMask::can_represent(OptoReg::add(_out_arg_limit,-1)),
397            "must be able to represent all call arguments in reg mask");
398  init = _out_arg_limit;
399  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
400    C->FIRST_STACK_mask().Insert(i);
401
402  // Finally, set the "infinite stack" bit.
403  C->FIRST_STACK_mask().set_AllStack();
404
405  // Make spill masks.  Registers for their class, plus FIRST_STACK_mask.
406#ifdef _LP64
407  *idealreg2spillmask[Op_RegN] = *idealreg2regmask[Op_RegN];
408   idealreg2spillmask[Op_RegN]->OR(C->FIRST_STACK_mask());
409#endif
410  *idealreg2spillmask[Op_RegI] = *idealreg2regmask[Op_RegI];
411   idealreg2spillmask[Op_RegI]->OR(C->FIRST_STACK_mask());
412  *idealreg2spillmask[Op_RegL] = *idealreg2regmask[Op_RegL];
413   idealreg2spillmask[Op_RegL]->OR(C->FIRST_STACK_mask());
414  *idealreg2spillmask[Op_RegF] = *idealreg2regmask[Op_RegF];
415   idealreg2spillmask[Op_RegF]->OR(C->FIRST_STACK_mask());
416  *idealreg2spillmask[Op_RegD] = *idealreg2regmask[Op_RegD];
417   idealreg2spillmask[Op_RegD]->OR(C->FIRST_STACK_mask());
418  *idealreg2spillmask[Op_RegP] = *idealreg2regmask[Op_RegP];
419   idealreg2spillmask[Op_RegP]->OR(C->FIRST_STACK_mask());
420
421  // Make up debug masks.  Any spill slot plus callee-save registers.
422  // Caller-save registers are assumed to be trashable by the various
423  // inline-cache fixup routines.
424  *idealreg2debugmask[Op_RegN]= *idealreg2spillmask[Op_RegN];
425  *idealreg2debugmask[Op_RegI]= *idealreg2spillmask[Op_RegI];
426  *idealreg2debugmask[Op_RegL]= *idealreg2spillmask[Op_RegL];
427  *idealreg2debugmask[Op_RegF]= *idealreg2spillmask[Op_RegF];
428  *idealreg2debugmask[Op_RegD]= *idealreg2spillmask[Op_RegD];
429  *idealreg2debugmask[Op_RegP]= *idealreg2spillmask[Op_RegP];
430
431  // Prevent stub compilations from attempting to reference
432  // callee-saved registers from debug info
433  bool exclude_soe = !Compile::current()->is_method_compilation();
434
435  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
436    // registers the caller has to save do not work
437    if( _register_save_policy[i] == 'C' ||
438        _register_save_policy[i] == 'A' ||
439        (_register_save_policy[i] == 'E' && exclude_soe) ) {
440      idealreg2debugmask[Op_RegN]->Remove(i);
441      idealreg2debugmask[Op_RegI]->Remove(i); // Exclude save-on-call
442      idealreg2debugmask[Op_RegL]->Remove(i); // registers from debug
443      idealreg2debugmask[Op_RegF]->Remove(i); // masks
444      idealreg2debugmask[Op_RegD]->Remove(i);
445      idealreg2debugmask[Op_RegP]->Remove(i);
446    }
447  }
448}
449
450//---------------------------is_save_on_entry----------------------------------
451bool Matcher::is_save_on_entry( int reg ) {
452  return
453    _register_save_policy[reg] == 'E' ||
454    _register_save_policy[reg] == 'A' || // Save-on-entry register?
455    // Also save argument registers in the trampolining stubs
456    (C->save_argument_registers() && is_spillable_arg(reg));
457}
458
459//---------------------------Fixup_Save_On_Entry-------------------------------
460void Matcher::Fixup_Save_On_Entry( ) {
461  init_first_stack_mask();
462
463  Node *root = C->root();       // Short name for root
464  // Count number of save-on-entry registers.
465  uint soe_cnt = number_of_saved_registers();
466  uint i;
467
468  // Find the procedure Start Node
469  StartNode *start = C->start();
470  assert( start, "Expect a start node" );
471
472  // Save argument registers in the trampolining stubs
473  if( C->save_argument_registers() )
474    for( i = 0; i < _last_Mach_Reg; i++ )
475      if( is_spillable_arg(i) )
476        soe_cnt++;
477
478  // Input RegMask array shared by all Returns.
479  // The type for doubles and longs has a count of 2, but
480  // there is only 1 returned value
481  uint ret_edge_cnt = TypeFunc::Parms + ((C->tf()->range()->cnt() == TypeFunc::Parms) ? 0 : 1);
482  RegMask *ret_rms  = init_input_masks( ret_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
483  // Returns have 0 or 1 returned values depending on call signature.
484  // Return register is specified by return_value in the AD file.
485  if (ret_edge_cnt > TypeFunc::Parms)
486    ret_rms[TypeFunc::Parms+0] = _return_value_mask;
487
488  // Input RegMask array shared by all Rethrows.
489  uint reth_edge_cnt = TypeFunc::Parms+1;
490  RegMask *reth_rms  = init_input_masks( reth_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
491  // Rethrow takes exception oop only, but in the argument 0 slot.
492  reth_rms[TypeFunc::Parms] = mreg2regmask[find_receiver(false)];
493#ifdef _LP64
494  // Need two slots for ptrs in 64-bit land
495  reth_rms[TypeFunc::Parms].Insert(OptoReg::add(OptoReg::Name(find_receiver(false)),1));
496#endif
497
498  // Input RegMask array shared by all TailCalls
499  uint tail_call_edge_cnt = TypeFunc::Parms+2;
500  RegMask *tail_call_rms = init_input_masks( tail_call_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
501
502  // Input RegMask array shared by all TailJumps
503  uint tail_jump_edge_cnt = TypeFunc::Parms+2;
504  RegMask *tail_jump_rms = init_input_masks( tail_jump_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
505
506  // TailCalls have 2 returned values (target & moop), whose masks come
507  // from the usual MachNode/MachOper mechanism.  Find a sample
508  // TailCall to extract these masks and put the correct masks into
509  // the tail_call_rms array.
510  for( i=1; i < root->req(); i++ ) {
511    MachReturnNode *m = root->in(i)->as_MachReturn();
512    if( m->ideal_Opcode() == Op_TailCall ) {
513      tail_call_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
514      tail_call_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
515      break;
516    }
517  }
518
519  // TailJumps have 2 returned values (target & ex_oop), whose masks come
520  // from the usual MachNode/MachOper mechanism.  Find a sample
521  // TailJump to extract these masks and put the correct masks into
522  // the tail_jump_rms array.
523  for( i=1; i < root->req(); i++ ) {
524    MachReturnNode *m = root->in(i)->as_MachReturn();
525    if( m->ideal_Opcode() == Op_TailJump ) {
526      tail_jump_rms[TypeFunc::Parms+0] = m->MachNode::in_RegMask(TypeFunc::Parms+0);
527      tail_jump_rms[TypeFunc::Parms+1] = m->MachNode::in_RegMask(TypeFunc::Parms+1);
528      break;
529    }
530  }
531
532  // Input RegMask array shared by all Halts
533  uint halt_edge_cnt = TypeFunc::Parms;
534  RegMask *halt_rms = init_input_masks( halt_edge_cnt + soe_cnt, _return_addr_mask, c_frame_ptr_mask );
535
536  // Capture the return input masks into each exit flavor
537  for( i=1; i < root->req(); i++ ) {
538    MachReturnNode *exit = root->in(i)->as_MachReturn();
539    switch( exit->ideal_Opcode() ) {
540      case Op_Return   : exit->_in_rms = ret_rms;  break;
541      case Op_Rethrow  : exit->_in_rms = reth_rms; break;
542      case Op_TailCall : exit->_in_rms = tail_call_rms; break;
543      case Op_TailJump : exit->_in_rms = tail_jump_rms; break;
544      case Op_Halt     : exit->_in_rms = halt_rms; break;
545      default          : ShouldNotReachHere();
546    }
547  }
548
549  // Next unused projection number from Start.
550  int proj_cnt = C->tf()->domain()->cnt();
551
552  // Do all the save-on-entry registers.  Make projections from Start for
553  // them, and give them a use at the exit points.  To the allocator, they
554  // look like incoming register arguments.
555  for( i = 0; i < _last_Mach_Reg; i++ ) {
556    if( is_save_on_entry(i) ) {
557
558      // Add the save-on-entry to the mask array
559      ret_rms      [      ret_edge_cnt] = mreg2regmask[i];
560      reth_rms     [     reth_edge_cnt] = mreg2regmask[i];
561      tail_call_rms[tail_call_edge_cnt] = mreg2regmask[i];
562      tail_jump_rms[tail_jump_edge_cnt] = mreg2regmask[i];
563      // Halts need the SOE registers, but only in the stack as debug info.
564      // A just-prior uncommon-trap or deoptimization will use the SOE regs.
565      halt_rms     [     halt_edge_cnt] = *idealreg2spillmask[_register_save_type[i]];
566
567      Node *mproj;
568
569      // Is this a RegF low half of a RegD?  Double up 2 adjacent RegF's
570      // into a single RegD.
571      if( (i&1) == 0 &&
572          _register_save_type[i  ] == Op_RegF &&
573          _register_save_type[i+1] == Op_RegF &&
574          is_save_on_entry(i+1) ) {
575        // Add other bit for double
576        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
577        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
578        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
579        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
580        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
581        mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegD );
582        proj_cnt += 2;          // Skip 2 for doubles
583      }
584      else if( (i&1) == 1 &&    // Else check for high half of double
585               _register_save_type[i-1] == Op_RegF &&
586               _register_save_type[i  ] == Op_RegF &&
587               is_save_on_entry(i-1) ) {
588        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
589        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
590        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
591        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
592        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
593        mproj = C->top();
594      }
595      // Is this a RegI low half of a RegL?  Double up 2 adjacent RegI's
596      // into a single RegL.
597      else if( (i&1) == 0 &&
598          _register_save_type[i  ] == Op_RegI &&
599          _register_save_type[i+1] == Op_RegI &&
600        is_save_on_entry(i+1) ) {
601        // Add other bit for long
602        ret_rms      [      ret_edge_cnt].Insert(OptoReg::Name(i+1));
603        reth_rms     [     reth_edge_cnt].Insert(OptoReg::Name(i+1));
604        tail_call_rms[tail_call_edge_cnt].Insert(OptoReg::Name(i+1));
605        tail_jump_rms[tail_jump_edge_cnt].Insert(OptoReg::Name(i+1));
606        halt_rms     [     halt_edge_cnt].Insert(OptoReg::Name(i+1));
607        mproj = new (C, 1) MachProjNode( start, proj_cnt, ret_rms[ret_edge_cnt], Op_RegL );
608        proj_cnt += 2;          // Skip 2 for longs
609      }
610      else if( (i&1) == 1 &&    // Else check for high half of long
611               _register_save_type[i-1] == Op_RegI &&
612               _register_save_type[i  ] == Op_RegI &&
613               is_save_on_entry(i-1) ) {
614        ret_rms      [      ret_edge_cnt] = RegMask::Empty;
615        reth_rms     [     reth_edge_cnt] = RegMask::Empty;
616        tail_call_rms[tail_call_edge_cnt] = RegMask::Empty;
617        tail_jump_rms[tail_jump_edge_cnt] = RegMask::Empty;
618        halt_rms     [     halt_edge_cnt] = RegMask::Empty;
619        mproj = C->top();
620      } else {
621        // Make a projection for it off the Start
622        mproj = new (C, 1) MachProjNode( start, proj_cnt++, ret_rms[ret_edge_cnt], _register_save_type[i] );
623      }
624
625      ret_edge_cnt ++;
626      reth_edge_cnt ++;
627      tail_call_edge_cnt ++;
628      tail_jump_edge_cnt ++;
629      halt_edge_cnt ++;
630
631      // Add a use of the SOE register to all exit paths
632      for( uint j=1; j < root->req(); j++ )
633        root->in(j)->add_req(mproj);
634    } // End of if a save-on-entry register
635  } // End of for all machine registers
636}
637
638//------------------------------init_spill_mask--------------------------------
639void Matcher::init_spill_mask( Node *ret ) {
640  if( idealreg2regmask[Op_RegI] ) return; // One time only init
641
642  OptoReg::c_frame_pointer = c_frame_pointer();
643  c_frame_ptr_mask = c_frame_pointer();
644#ifdef _LP64
645  // pointers are twice as big
646  c_frame_ptr_mask.Insert(OptoReg::add(c_frame_pointer(),1));
647#endif
648
649  // Start at OptoReg::stack0()
650  STACK_ONLY_mask.Clear();
651  OptoReg::Name init = OptoReg::stack2reg(0);
652  // STACK_ONLY_mask is all stack bits
653  OptoReg::Name i;
654  for (i = init; RegMask::can_represent(i); i = OptoReg::add(i,1))
655    STACK_ONLY_mask.Insert(i);
656  // Also set the "infinite stack" bit.
657  STACK_ONLY_mask.set_AllStack();
658
659  // Copy the register names over into the shared world
660  for( i=OptoReg::Name(0); i<OptoReg::Name(_last_Mach_Reg); i = OptoReg::add(i,1) ) {
661    // SharedInfo::regName[i] = regName[i];
662    // Handy RegMasks per machine register
663    mreg2regmask[i].Insert(i);
664  }
665
666  // Grab the Frame Pointer
667  Node *fp  = ret->in(TypeFunc::FramePtr);
668  Node *mem = ret->in(TypeFunc::Memory);
669  const TypePtr* atp = TypePtr::BOTTOM;
670  // Share frame pointer while making spill ops
671  set_shared(fp);
672
673  // Compute generic short-offset Loads
674#ifdef _LP64
675  MachNode *spillCP = match_tree(new (C, 3) LoadNNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM));
676#endif
677  MachNode *spillI  = match_tree(new (C, 3) LoadINode(NULL,mem,fp,atp));
678  MachNode *spillL  = match_tree(new (C, 3) LoadLNode(NULL,mem,fp,atp));
679  MachNode *spillF  = match_tree(new (C, 3) LoadFNode(NULL,mem,fp,atp));
680  MachNode *spillD  = match_tree(new (C, 3) LoadDNode(NULL,mem,fp,atp));
681  MachNode *spillP  = match_tree(new (C, 3) LoadPNode(NULL,mem,fp,atp,TypeInstPtr::BOTTOM));
682  assert(spillI != NULL && spillL != NULL && spillF != NULL &&
683         spillD != NULL && spillP != NULL, "");
684
685  // Get the ADLC notion of the right regmask, for each basic type.
686#ifdef _LP64
687  idealreg2regmask[Op_RegN] = &spillCP->out_RegMask();
688#endif
689  idealreg2regmask[Op_RegI] = &spillI->out_RegMask();
690  idealreg2regmask[Op_RegL] = &spillL->out_RegMask();
691  idealreg2regmask[Op_RegF] = &spillF->out_RegMask();
692  idealreg2regmask[Op_RegD] = &spillD->out_RegMask();
693  idealreg2regmask[Op_RegP] = &spillP->out_RegMask();
694}
695
696#ifdef ASSERT
697static void match_alias_type(Compile* C, Node* n, Node* m) {
698  if (!VerifyAliases)  return;  // do not go looking for trouble by default
699  const TypePtr* nat = n->adr_type();
700  const TypePtr* mat = m->adr_type();
701  int nidx = C->get_alias_index(nat);
702  int midx = C->get_alias_index(mat);
703  // Detune the assert for cases like (AndI 0xFF (LoadB p)).
704  if (nidx == Compile::AliasIdxTop && midx >= Compile::AliasIdxRaw) {
705    for (uint i = 1; i < n->req(); i++) {
706      Node* n1 = n->in(i);
707      const TypePtr* n1at = n1->adr_type();
708      if (n1at != NULL) {
709        nat = n1at;
710        nidx = C->get_alias_index(n1at);
711      }
712    }
713  }
714  // %%% Kludgery.  Instead, fix ideal adr_type methods for all these cases:
715  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxRaw) {
716    switch (n->Opcode()) {
717    case Op_PrefetchRead:
718    case Op_PrefetchWrite:
719      nidx = Compile::AliasIdxRaw;
720      nat = TypeRawPtr::BOTTOM;
721      break;
722    }
723  }
724  if (nidx == Compile::AliasIdxRaw && midx == Compile::AliasIdxTop) {
725    switch (n->Opcode()) {
726    case Op_ClearArray:
727      midx = Compile::AliasIdxRaw;
728      mat = TypeRawPtr::BOTTOM;
729      break;
730    }
731  }
732  if (nidx == Compile::AliasIdxTop && midx == Compile::AliasIdxBot) {
733    switch (n->Opcode()) {
734    case Op_Return:
735    case Op_Rethrow:
736    case Op_Halt:
737    case Op_TailCall:
738    case Op_TailJump:
739      nidx = Compile::AliasIdxBot;
740      nat = TypePtr::BOTTOM;
741      break;
742    }
743  }
744  if (nidx == Compile::AliasIdxBot && midx == Compile::AliasIdxTop) {
745    switch (n->Opcode()) {
746    case Op_StrComp:
747    case Op_MemBarVolatile:
748    case Op_MemBarCPUOrder: // %%% these ideals should have narrower adr_type?
749      nidx = Compile::AliasIdxTop;
750      nat = NULL;
751      break;
752    }
753  }
754  if (nidx != midx) {
755    if (PrintOpto || (PrintMiscellaneous && (WizardMode || Verbose))) {
756      tty->print_cr("==== Matcher alias shift %d => %d", nidx, midx);
757      n->dump();
758      m->dump();
759    }
760    assert(C->subsume_loads() && C->must_alias(nat, midx),
761           "must not lose alias info when matching");
762  }
763}
764#endif
765
766
767//------------------------------MStack-----------------------------------------
768// State and MStack class used in xform() and find_shared() iterative methods.
769enum Node_State { Pre_Visit,  // node has to be pre-visited
770                      Visit,  // visit node
771                 Post_Visit,  // post-visit node
772             Alt_Post_Visit   // alternative post-visit path
773                };
774
775class MStack: public Node_Stack {
776  public:
777    MStack(int size) : Node_Stack(size) { }
778
779    void push(Node *n, Node_State ns) {
780      Node_Stack::push(n, (uint)ns);
781    }
782    void push(Node *n, Node_State ns, Node *parent, int indx) {
783      ++_inode_top;
784      if ((_inode_top + 1) >= _inode_max) grow();
785      _inode_top->node = parent;
786      _inode_top->indx = (uint)indx;
787      ++_inode_top;
788      _inode_top->node = n;
789      _inode_top->indx = (uint)ns;
790    }
791    Node *parent() {
792      pop();
793      return node();
794    }
795    Node_State state() const {
796      return (Node_State)index();
797    }
798    void set_state(Node_State ns) {
799      set_index((uint)ns);
800    }
801};
802
803
804//------------------------------xform------------------------------------------
805// Given a Node in old-space, Match him (Label/Reduce) to produce a machine
806// Node in new-space.  Given a new-space Node, recursively walk his children.
807Node *Matcher::transform( Node *n ) { ShouldNotCallThis(); return n; }
808Node *Matcher::xform( Node *n, int max_stack ) {
809  // Use one stack to keep both: child's node/state and parent's node/index
810  MStack mstack(max_stack * 2 * 2); // C->unique() * 2 * 2
811  mstack.push(n, Visit, NULL, -1);  // set NULL as parent to indicate root
812
813  while (mstack.is_nonempty()) {
814    n = mstack.node();          // Leave node on stack
815    Node_State nstate = mstack.state();
816    if (nstate == Visit) {
817      mstack.set_state(Post_Visit);
818      Node *oldn = n;
819      // Old-space or new-space check
820      if (!C->node_arena()->contains(n)) {
821        // Old space!
822        Node* m;
823        if (has_new_node(n)) {  // Not yet Label/Reduced
824          m = new_node(n);
825        } else {
826          if (!is_dontcare(n)) { // Matcher can match this guy
827            // Calls match special.  They match alone with no children.
828            // Their children, the incoming arguments, match normally.
829            m = n->is_SafePoint() ? match_sfpt(n->as_SafePoint()):match_tree(n);
830            if (C->failing())  return NULL;
831            if (m == NULL) { Matcher::soft_match_failure(); return NULL; }
832          } else {                  // Nothing the matcher cares about
833            if( n->is_Proj() && n->in(0)->is_Multi()) {       // Projections?
834              // Convert to machine-dependent projection
835              m = n->in(0)->as_Multi()->match( n->as_Proj(), this );
836              if (m->in(0) != NULL) // m might be top
837                collect_null_checks(m);
838            } else {                // Else just a regular 'ol guy
839              m = n->clone();       // So just clone into new-space
840              // Def-Use edges will be added incrementally as Uses
841              // of this node are matched.
842              assert(m->outcnt() == 0, "no Uses of this clone yet");
843            }
844          }
845
846          set_new_node(n, m);       // Map old to new
847          if (_old_node_note_array != NULL) {
848            Node_Notes* nn = C->locate_node_notes(_old_node_note_array,
849                                                  n->_idx);
850            C->set_node_notes_at(m->_idx, nn);
851          }
852          debug_only(match_alias_type(C, n, m));
853        }
854        n = m;    // n is now a new-space node
855        mstack.set_node(n);
856      }
857
858      // New space!
859      if (_visited.test_set(n->_idx)) continue; // while(mstack.is_nonempty())
860
861      int i;
862      // Put precedence edges on stack first (match them last).
863      for (i = oldn->req(); (uint)i < oldn->len(); i++) {
864        Node *m = oldn->in(i);
865        if (m == NULL) break;
866        // set -1 to call add_prec() instead of set_req() during Step1
867        mstack.push(m, Visit, n, -1);
868      }
869
870      // For constant debug info, I'd rather have unmatched constants.
871      int cnt = n->req();
872      JVMState* jvms = n->jvms();
873      int debug_cnt = jvms ? jvms->debug_start() : cnt;
874
875      // Now do only debug info.  Clone constants rather than matching.
876      // Constants are represented directly in the debug info without
877      // the need for executable machine instructions.
878      // Monitor boxes are also represented directly.
879      for (i = cnt - 1; i >= debug_cnt; --i) { // For all debug inputs do
880        Node *m = n->in(i);          // Get input
881        int op = m->Opcode();
882        assert((op == Op_BoxLock) == jvms->is_monitor_use(i), "boxes only at monitor sites");
883        if( op == Op_ConI || op == Op_ConP || op == Op_ConN ||
884            op == Op_ConF || op == Op_ConD || op == Op_ConL
885            // || op == Op_BoxLock  // %%%% enable this and remove (+++) in chaitin.cpp
886            ) {
887          m = m->clone();
888          mstack.push(m, Post_Visit, n, i); // Don't neet to visit
889          mstack.push(m->in(0), Visit, m, 0);
890        } else {
891          mstack.push(m, Visit, n, i);
892        }
893      }
894
895      // And now walk his children, and convert his inputs to new-space.
896      for( ; i >= 0; --i ) { // For all normal inputs do
897        Node *m = n->in(i);  // Get input
898        if(m != NULL)
899          mstack.push(m, Visit, n, i);
900      }
901
902    }
903    else if (nstate == Post_Visit) {
904      // Set xformed input
905      Node *p = mstack.parent();
906      if (p != NULL) { // root doesn't have parent
907        int i = (int)mstack.index();
908        if (i >= 0)
909          p->set_req(i, n); // required input
910        else if (i == -1)
911          p->add_prec(n);   // precedence input
912        else
913          ShouldNotReachHere();
914      }
915      mstack.pop(); // remove processed node from stack
916    }
917    else {
918      ShouldNotReachHere();
919    }
920  } // while (mstack.is_nonempty())
921  return n; // Return new-space Node
922}
923
924//------------------------------warp_outgoing_stk_arg------------------------
925OptoReg::Name Matcher::warp_outgoing_stk_arg( VMReg reg, OptoReg::Name begin_out_arg_area, OptoReg::Name &out_arg_limit_per_call ) {
926  // Convert outgoing argument location to a pre-biased stack offset
927  if (reg->is_stack()) {
928    OptoReg::Name warped = reg->reg2stack();
929    // Adjust the stack slot offset to be the register number used
930    // by the allocator.
931    warped = OptoReg::add(begin_out_arg_area, warped);
932    // Keep track of the largest numbered stack slot used for an arg.
933    // Largest used slot per call-site indicates the amount of stack
934    // that is killed by the call.
935    if( warped >= out_arg_limit_per_call )
936      out_arg_limit_per_call = OptoReg::add(warped,1);
937    if (!RegMask::can_represent(warped)) {
938      C->record_method_not_compilable_all_tiers("unsupported calling sequence");
939      return OptoReg::Bad;
940    }
941    return warped;
942  }
943  return OptoReg::as_OptoReg(reg);
944}
945
946
947//------------------------------match_sfpt-------------------------------------
948// Helper function to match call instructions.  Calls match special.
949// They match alone with no children.  Their children, the incoming
950// arguments, match normally.
951MachNode *Matcher::match_sfpt( SafePointNode *sfpt ) {
952  MachSafePointNode *msfpt = NULL;
953  MachCallNode      *mcall = NULL;
954  uint               cnt;
955  // Split out case for SafePoint vs Call
956  CallNode *call;
957  const TypeTuple *domain;
958  ciMethod*        method = NULL;
959  if( sfpt->is_Call() ) {
960    call = sfpt->as_Call();
961    domain = call->tf()->domain();
962    cnt = domain->cnt();
963
964    // Match just the call, nothing else
965    MachNode *m = match_tree(call);
966    if (C->failing())  return NULL;
967    if( m == NULL ) { Matcher::soft_match_failure(); return NULL; }
968
969    // Copy data from the Ideal SafePoint to the machine version
970    mcall = m->as_MachCall();
971
972    mcall->set_tf(         call->tf());
973    mcall->set_entry_point(call->entry_point());
974    mcall->set_cnt(        call->cnt());
975
976    if( mcall->is_MachCallJava() ) {
977      MachCallJavaNode *mcall_java  = mcall->as_MachCallJava();
978      const CallJavaNode *call_java =  call->as_CallJava();
979      method = call_java->method();
980      mcall_java->_method = method;
981      mcall_java->_bci = call_java->_bci;
982      mcall_java->_optimized_virtual = call_java->is_optimized_virtual();
983      if( mcall_java->is_MachCallStaticJava() )
984        mcall_java->as_MachCallStaticJava()->_name =
985         call_java->as_CallStaticJava()->_name;
986      if( mcall_java->is_MachCallDynamicJava() )
987        mcall_java->as_MachCallDynamicJava()->_vtable_index =
988         call_java->as_CallDynamicJava()->_vtable_index;
989    }
990    else if( mcall->is_MachCallRuntime() ) {
991      mcall->as_MachCallRuntime()->_name = call->as_CallRuntime()->_name;
992    }
993    msfpt = mcall;
994  }
995  // This is a non-call safepoint
996  else {
997    call = NULL;
998    domain = NULL;
999    MachNode *mn = match_tree(sfpt);
1000    if (C->failing())  return NULL;
1001    msfpt = mn->as_MachSafePoint();
1002    cnt = TypeFunc::Parms;
1003  }
1004
1005  // Advertise the correct memory effects (for anti-dependence computation).
1006  msfpt->set_adr_type(sfpt->adr_type());
1007
1008  // Allocate a private array of RegMasks.  These RegMasks are not shared.
1009  msfpt->_in_rms = NEW_RESOURCE_ARRAY( RegMask, cnt );
1010  // Empty them all.
1011  memset( msfpt->_in_rms, 0, sizeof(RegMask)*cnt );
1012
1013  // Do all the pre-defined non-Empty register masks
1014  msfpt->_in_rms[TypeFunc::ReturnAdr] = _return_addr_mask;
1015  msfpt->_in_rms[TypeFunc::FramePtr ] = c_frame_ptr_mask;
1016
1017  // Place first outgoing argument can possibly be put.
1018  OptoReg::Name begin_out_arg_area = OptoReg::add(_new_SP, C->out_preserve_stack_slots());
1019  assert( is_even(begin_out_arg_area), "" );
1020  // Compute max outgoing register number per call site.
1021  OptoReg::Name out_arg_limit_per_call = begin_out_arg_area;
1022  // Calls to C may hammer extra stack slots above and beyond any arguments.
1023  // These are usually backing store for register arguments for varargs.
1024  if( call != NULL && call->is_CallRuntime() )
1025    out_arg_limit_per_call = OptoReg::add(out_arg_limit_per_call,C->varargs_C_out_slots_killed());
1026
1027
1028  // Do the normal argument list (parameters) register masks
1029  int argcnt = cnt - TypeFunc::Parms;
1030  if( argcnt > 0 ) {          // Skip it all if we have no args
1031    BasicType *sig_bt  = NEW_RESOURCE_ARRAY( BasicType, argcnt );
1032    VMRegPair *parm_regs = NEW_RESOURCE_ARRAY( VMRegPair, argcnt );
1033    int i;
1034    for( i = 0; i < argcnt; i++ ) {
1035      sig_bt[i] = domain->field_at(i+TypeFunc::Parms)->basic_type();
1036    }
1037    // V-call to pick proper calling convention
1038    call->calling_convention( sig_bt, parm_regs, argcnt );
1039
1040#ifdef ASSERT
1041    // Sanity check users' calling convention.  Really handy during
1042    // the initial porting effort.  Fairly expensive otherwise.
1043    { for (int i = 0; i<argcnt; i++) {
1044      if( !parm_regs[i].first()->is_valid() &&
1045          !parm_regs[i].second()->is_valid() ) continue;
1046      VMReg reg1 = parm_regs[i].first();
1047      VMReg reg2 = parm_regs[i].second();
1048      for (int j = 0; j < i; j++) {
1049        if( !parm_regs[j].first()->is_valid() &&
1050            !parm_regs[j].second()->is_valid() ) continue;
1051        VMReg reg3 = parm_regs[j].first();
1052        VMReg reg4 = parm_regs[j].second();
1053        if( !reg1->is_valid() ) {
1054          assert( !reg2->is_valid(), "valid halvsies" );
1055        } else if( !reg3->is_valid() ) {
1056          assert( !reg4->is_valid(), "valid halvsies" );
1057        } else {
1058          assert( reg1 != reg2, "calling conv. must produce distinct regs");
1059          assert( reg1 != reg3, "calling conv. must produce distinct regs");
1060          assert( reg1 != reg4, "calling conv. must produce distinct regs");
1061          assert( reg2 != reg3, "calling conv. must produce distinct regs");
1062          assert( reg2 != reg4 || !reg2->is_valid(), "calling conv. must produce distinct regs");
1063          assert( reg3 != reg4, "calling conv. must produce distinct regs");
1064        }
1065      }
1066    }
1067    }
1068#endif
1069
1070    // Visit each argument.  Compute its outgoing register mask.
1071    // Return results now can have 2 bits returned.
1072    // Compute max over all outgoing arguments both per call-site
1073    // and over the entire method.
1074    for( i = 0; i < argcnt; i++ ) {
1075      // Address of incoming argument mask to fill in
1076      RegMask *rm = &mcall->_in_rms[i+TypeFunc::Parms];
1077      if( !parm_regs[i].first()->is_valid() &&
1078          !parm_regs[i].second()->is_valid() ) {
1079        continue;               // Avoid Halves
1080      }
1081      // Grab first register, adjust stack slots and insert in mask.
1082      OptoReg::Name reg1 = warp_outgoing_stk_arg(parm_regs[i].first(), begin_out_arg_area, out_arg_limit_per_call );
1083      if (OptoReg::is_valid(reg1))
1084        rm->Insert( reg1 );
1085      // Grab second register (if any), adjust stack slots and insert in mask.
1086      OptoReg::Name reg2 = warp_outgoing_stk_arg(parm_regs[i].second(), begin_out_arg_area, out_arg_limit_per_call );
1087      if (OptoReg::is_valid(reg2))
1088        rm->Insert( reg2 );
1089    } // End of for all arguments
1090
1091    // Compute number of stack slots needed to restore stack in case of
1092    // Pascal-style argument popping.
1093    mcall->_argsize = out_arg_limit_per_call - begin_out_arg_area;
1094  }
1095
1096  // Compute the max stack slot killed by any call.  These will not be
1097  // available for debug info, and will be used to adjust FIRST_STACK_mask
1098  // after all call sites have been visited.
1099  if( _out_arg_limit < out_arg_limit_per_call)
1100    _out_arg_limit = out_arg_limit_per_call;
1101
1102  if (mcall) {
1103    // Kill the outgoing argument area, including any non-argument holes and
1104    // any legacy C-killed slots.  Use Fat-Projections to do the killing.
1105    // Since the max-per-method covers the max-per-call-site and debug info
1106    // is excluded on the max-per-method basis, debug info cannot land in
1107    // this killed area.
1108    uint r_cnt = mcall->tf()->range()->cnt();
1109    MachProjNode *proj = new (C, 1) MachProjNode( mcall, r_cnt+10000, RegMask::Empty, MachProjNode::fat_proj );
1110    if (!RegMask::can_represent(OptoReg::Name(out_arg_limit_per_call-1))) {
1111      C->record_method_not_compilable_all_tiers("unsupported outgoing calling sequence");
1112    } else {
1113      for (int i = begin_out_arg_area; i < out_arg_limit_per_call; i++)
1114        proj->_rout.Insert(OptoReg::Name(i));
1115    }
1116    if( proj->_rout.is_NotEmpty() )
1117      _proj_list.push(proj);
1118  }
1119  // Transfer the safepoint information from the call to the mcall
1120  // Move the JVMState list
1121  msfpt->set_jvms(sfpt->jvms());
1122  for (JVMState* jvms = msfpt->jvms(); jvms; jvms = jvms->caller()) {
1123    jvms->set_map(sfpt);
1124  }
1125
1126  // Debug inputs begin just after the last incoming parameter
1127  assert( (mcall == NULL) || (mcall->jvms() == NULL) ||
1128          (mcall->jvms()->debug_start() + mcall->_jvmadj == mcall->tf()->domain()->cnt()), "" );
1129
1130  // Move the OopMap
1131  msfpt->_oop_map = sfpt->_oop_map;
1132
1133  // Registers killed by the call are set in the local scheduling pass
1134  // of Global Code Motion.
1135  return msfpt;
1136}
1137
1138//---------------------------match_tree----------------------------------------
1139// Match a Ideal Node DAG - turn it into a tree; Label & Reduce.  Used as part
1140// of the whole-sale conversion from Ideal to Mach Nodes.  Also used for
1141// making GotoNodes while building the CFG and in init_spill_mask() to identify
1142// a Load's result RegMask for memoization in idealreg2regmask[]
1143MachNode *Matcher::match_tree( const Node *n ) {
1144  assert( n->Opcode() != Op_Phi, "cannot match" );
1145  assert( !n->is_block_start(), "cannot match" );
1146  // Set the mark for all locally allocated State objects.
1147  // When this call returns, the _states_arena arena will be reset
1148  // freeing all State objects.
1149  ResourceMark rm( &_states_arena );
1150
1151  LabelRootDepth = 0;
1152
1153  // StoreNodes require their Memory input to match any LoadNodes
1154  Node *mem = n->is_Store() ? n->in(MemNode::Memory) : (Node*)1 ;
1155
1156  // State object for root node of match tree
1157  // Allocate it on _states_arena - stack allocation can cause stack overflow.
1158  State *s = new (&_states_arena) State;
1159  s->_kids[0] = NULL;
1160  s->_kids[1] = NULL;
1161  s->_leaf = (Node*)n;
1162  // Label the input tree, allocating labels from top-level arena
1163  Label_Root( n, s, n->in(0), mem );
1164  if (C->failing())  return NULL;
1165
1166  // The minimum cost match for the whole tree is found at the root State
1167  uint mincost = max_juint;
1168  uint cost = max_juint;
1169  uint i;
1170  for( i = 0; i < NUM_OPERANDS; i++ ) {
1171    if( s->valid(i) &&                // valid entry and
1172        s->_cost[i] < cost &&         // low cost and
1173        s->_rule[i] >= NUM_OPERANDS ) // not an operand
1174      cost = s->_cost[mincost=i];
1175  }
1176  if (mincost == max_juint) {
1177#ifndef PRODUCT
1178    tty->print("No matching rule for:");
1179    s->dump();
1180#endif
1181    Matcher::soft_match_failure();
1182    return NULL;
1183  }
1184  // Reduce input tree based upon the state labels to machine Nodes
1185  MachNode *m = ReduceInst( s, s->_rule[mincost], mem );
1186#ifdef ASSERT
1187  _old2new_map.map(n->_idx, m);
1188#endif
1189
1190  // Add any Matcher-ignored edges
1191  uint cnt = n->req();
1192  uint start = 1;
1193  if( mem != (Node*)1 ) start = MemNode::Memory+1;
1194  if( n->Opcode() == Op_AddP ) {
1195    assert( mem == (Node*)1, "" );
1196    start = AddPNode::Base+1;
1197  }
1198  for( i = start; i < cnt; i++ ) {
1199    if( !n->match_edge(i) ) {
1200      if( i < m->req() )
1201        m->ins_req( i, n->in(i) );
1202      else
1203        m->add_req( n->in(i) );
1204    }
1205  }
1206
1207  return m;
1208}
1209
1210
1211//------------------------------match_into_reg---------------------------------
1212// Choose to either match this Node in a register or part of the current
1213// match tree.  Return true for requiring a register and false for matching
1214// as part of the current match tree.
1215static bool match_into_reg( const Node *n, Node *m, Node *control, int i, bool shared ) {
1216
1217  const Type *t = m->bottom_type();
1218
1219  if( t->singleton() ) {
1220    // Never force constants into registers.  Allow them to match as
1221    // constants or registers.  Copies of the same value will share
1222    // the same register.  See find_shared_constant.
1223    return false;
1224  } else {                      // Not a constant
1225    // Stop recursion if they have different Controls.
1226    // Slot 0 of constants is not really a Control.
1227    if( control && m->in(0) && control != m->in(0) ) {
1228
1229      // Actually, we can live with the most conservative control we
1230      // find, if it post-dominates the others.  This allows us to
1231      // pick up load/op/store trees where the load can float a little
1232      // above the store.
1233      Node *x = control;
1234      const uint max_scan = 6;   // Arbitrary scan cutoff
1235      uint j;
1236      for( j=0; j<max_scan; j++ ) {
1237        if( x->is_Region() )    // Bail out at merge points
1238          return true;
1239        x = x->in(0);
1240        if( x == m->in(0) )     // Does 'control' post-dominate
1241          break;                // m->in(0)?  If so, we can use it
1242      }
1243      if( j == max_scan )       // No post-domination before scan end?
1244        return true;            // Then break the match tree up
1245    }
1246
1247    if (m->Opcode() == Op_DecodeN && m->outcnt() == 2) {
1248      // These are commonly used in address expressions and can
1249      // efficiently fold into them in some cases but because they are
1250      // consumed by AddP they commonly have two users.
1251      if (m->raw_out(0) == m->raw_out(1) && m->raw_out(0)->Opcode() == Op_AddP) return false;
1252    }
1253  }
1254
1255  // Not forceably cloning.  If shared, put it into a register.
1256  return shared;
1257}
1258
1259
1260//------------------------------Instruction Selection--------------------------
1261// Label method walks a "tree" of nodes, using the ADLC generated DFA to match
1262// ideal nodes to machine instructions.  Trees are delimited by shared Nodes,
1263// things the Matcher does not match (e.g., Memory), and things with different
1264// Controls (hence forced into different blocks).  We pass in the Control
1265// selected for this entire State tree.
1266
1267// The Matcher works on Trees, but an Intel add-to-memory requires a DAG: the
1268// Store and the Load must have identical Memories (as well as identical
1269// pointers).  Since the Matcher does not have anything for Memory (and
1270// does not handle DAGs), I have to match the Memory input myself.  If the
1271// Tree root is a Store, I require all Loads to have the identical memory.
1272Node *Matcher::Label_Root( const Node *n, State *svec, Node *control, const Node *mem){
1273  // Since Label_Root is a recursive function, its possible that we might run
1274  // out of stack space.  See bugs 6272980 & 6227033 for more info.
1275  LabelRootDepth++;
1276  if (LabelRootDepth > MaxLabelRootDepth) {
1277    C->record_method_not_compilable_all_tiers("Out of stack space, increase MaxLabelRootDepth");
1278    return NULL;
1279  }
1280  uint care = 0;                // Edges matcher cares about
1281  uint cnt = n->req();
1282  uint i = 0;
1283
1284  // Examine children for memory state
1285  // Can only subsume a child into your match-tree if that child's memory state
1286  // is not modified along the path to another input.
1287  // It is unsafe even if the other inputs are separate roots.
1288  Node *input_mem = NULL;
1289  for( i = 1; i < cnt; i++ ) {
1290    if( !n->match_edge(i) ) continue;
1291    Node *m = n->in(i);         // Get ith input
1292    assert( m, "expect non-null children" );
1293    if( m->is_Load() ) {
1294      if( input_mem == NULL ) {
1295        input_mem = m->in(MemNode::Memory);
1296      } else if( input_mem != m->in(MemNode::Memory) ) {
1297        input_mem = NodeSentinel;
1298      }
1299    }
1300  }
1301
1302  for( i = 1; i < cnt; i++ ){// For my children
1303    if( !n->match_edge(i) ) continue;
1304    Node *m = n->in(i);         // Get ith input
1305    // Allocate states out of a private arena
1306    State *s = new (&_states_arena) State;
1307    svec->_kids[care++] = s;
1308    assert( care <= 2, "binary only for now" );
1309
1310    // Recursively label the State tree.
1311    s->_kids[0] = NULL;
1312    s->_kids[1] = NULL;
1313    s->_leaf = m;
1314
1315    // Check for leaves of the State Tree; things that cannot be a part of
1316    // the current tree.  If it finds any, that value is matched as a
1317    // register operand.  If not, then the normal matching is used.
1318    if( match_into_reg(n, m, control, i, is_shared(m)) ||
1319        //
1320        // Stop recursion if this is LoadNode and the root of this tree is a
1321        // StoreNode and the load & store have different memories.
1322        ((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ||
1323        // Can NOT include the match of a subtree when its memory state
1324        // is used by any of the other subtrees
1325        (input_mem == NodeSentinel) ) {
1326#ifndef PRODUCT
1327      // Print when we exclude matching due to different memory states at input-loads
1328      if( PrintOpto && (Verbose && WizardMode) && (input_mem == NodeSentinel)
1329        && !((mem!=(Node*)1) && m->is_Load() && m->in(MemNode::Memory) != mem) ) {
1330        tty->print_cr("invalid input_mem");
1331      }
1332#endif
1333      // Switch to a register-only opcode; this value must be in a register
1334      // and cannot be subsumed as part of a larger instruction.
1335      s->DFA( m->ideal_reg(), m );
1336
1337    } else {
1338      // If match tree has no control and we do, adopt it for entire tree
1339      if( control == NULL && m->in(0) != NULL && m->req() > 1 )
1340        control = m->in(0);         // Pick up control
1341      // Else match as a normal part of the match tree.
1342      control = Label_Root(m,s,control,mem);
1343      if (C->failing()) return NULL;
1344    }
1345  }
1346
1347
1348  // Call DFA to match this node, and return
1349  svec->DFA( n->Opcode(), n );
1350
1351#ifdef ASSERT
1352  uint x;
1353  for( x = 0; x < _LAST_MACH_OPER; x++ )
1354    if( svec->valid(x) )
1355      break;
1356
1357  if (x >= _LAST_MACH_OPER) {
1358    n->dump();
1359    svec->dump();
1360    assert( false, "bad AD file" );
1361  }
1362#endif
1363  return control;
1364}
1365
1366
1367// Con nodes reduced using the same rule can share their MachNode
1368// which reduces the number of copies of a constant in the final
1369// program.  The register allocator is free to split uses later to
1370// split live ranges.
1371MachNode* Matcher::find_shared_constant(Node* leaf, uint rule) {
1372  if (!leaf->is_Con()) return NULL;
1373
1374  // See if this Con has already been reduced using this rule.
1375  if (_shared_constants.Size() <= leaf->_idx) return NULL;
1376  MachNode* last = (MachNode*)_shared_constants.at(leaf->_idx);
1377  if (last != NULL && rule == last->rule()) {
1378    // Get the new space root.
1379    Node* xroot = new_node(C->root());
1380    if (xroot == NULL) {
1381      // This shouldn't happen give the order of matching.
1382      return NULL;
1383    }
1384
1385    // Shared constants need to have their control be root so they
1386    // can be scheduled properly.
1387    Node* control = last->in(0);
1388    if (control != xroot) {
1389      if (control == NULL || control == C->root()) {
1390        last->set_req(0, xroot);
1391      } else {
1392        assert(false, "unexpected control");
1393        return NULL;
1394      }
1395    }
1396    return last;
1397  }
1398  return NULL;
1399}
1400
1401
1402//------------------------------ReduceInst-------------------------------------
1403// Reduce a State tree (with given Control) into a tree of MachNodes.
1404// This routine (and it's cohort ReduceOper) convert Ideal Nodes into
1405// complicated machine Nodes.  Each MachNode covers some tree of Ideal Nodes.
1406// Each MachNode has a number of complicated MachOper operands; each
1407// MachOper also covers a further tree of Ideal Nodes.
1408
1409// The root of the Ideal match tree is always an instruction, so we enter
1410// the recursion here.  After building the MachNode, we need to recurse
1411// the tree checking for these cases:
1412// (1) Child is an instruction -
1413//     Build the instruction (recursively), add it as an edge.
1414//     Build a simple operand (register) to hold the result of the instruction.
1415// (2) Child is an interior part of an instruction -
1416//     Skip over it (do nothing)
1417// (3) Child is the start of a operand -
1418//     Build the operand, place it inside the instruction
1419//     Call ReduceOper.
1420MachNode *Matcher::ReduceInst( State *s, int rule, Node *&mem ) {
1421  assert( rule >= NUM_OPERANDS, "called with operand rule" );
1422
1423  MachNode* shared_con = find_shared_constant(s->_leaf, rule);
1424  if (shared_con != NULL) {
1425    return shared_con;
1426  }
1427
1428  // Build the object to represent this state & prepare for recursive calls
1429  MachNode *mach = s->MachNodeGenerator( rule, C );
1430  mach->_opnds[0] = s->MachOperGenerator( _reduceOp[rule], C );
1431  assert( mach->_opnds[0] != NULL, "Missing result operand" );
1432  Node *leaf = s->_leaf;
1433  // Check for instruction or instruction chain rule
1434  if( rule >= _END_INST_CHAIN_RULE || rule < _BEGIN_INST_CHAIN_RULE ) {
1435    // Instruction
1436    mach->add_req( leaf->in(0) ); // Set initial control
1437    // Reduce interior of complex instruction
1438    ReduceInst_Interior( s, rule, mem, mach, 1 );
1439  } else {
1440    // Instruction chain rules are data-dependent on their inputs
1441    mach->add_req(0);             // Set initial control to none
1442    ReduceInst_Chain_Rule( s, rule, mem, mach );
1443  }
1444
1445  // If a Memory was used, insert a Memory edge
1446  if( mem != (Node*)1 )
1447    mach->ins_req(MemNode::Memory,mem);
1448
1449  // If the _leaf is an AddP, insert the base edge
1450  if( leaf->Opcode() == Op_AddP )
1451    mach->ins_req(AddPNode::Base,leaf->in(AddPNode::Base));
1452
1453  uint num_proj = _proj_list.size();
1454
1455  // Perform any 1-to-many expansions required
1456  MachNode *ex = mach->Expand(s,_proj_list);
1457  if( ex != mach ) {
1458    assert(ex->ideal_reg() == mach->ideal_reg(), "ideal types should match");
1459    if( ex->in(1)->is_Con() )
1460      ex->in(1)->set_req(0, C->root());
1461    // Remove old node from the graph
1462    for( uint i=0; i<mach->req(); i++ ) {
1463      mach->set_req(i,NULL);
1464    }
1465  }
1466
1467  // PhaseChaitin::fixup_spills will sometimes generate spill code
1468  // via the matcher.  By the time, nodes have been wired into the CFG,
1469  // and any further nodes generated by expand rules will be left hanging
1470  // in space, and will not get emitted as output code.  Catch this.
1471  // Also, catch any new register allocation constraints ("projections")
1472  // generated belatedly during spill code generation.
1473  if (_allocation_started) {
1474    guarantee(ex == mach, "no expand rules during spill generation");
1475    guarantee(_proj_list.size() == num_proj, "no allocation during spill generation");
1476  }
1477
1478  if (leaf->is_Con()) {
1479    // Record the con for sharing
1480    _shared_constants.map(leaf->_idx, ex);
1481  }
1482
1483  return ex;
1484}
1485
1486void Matcher::ReduceInst_Chain_Rule( State *s, int rule, Node *&mem, MachNode *mach ) {
1487  // 'op' is what I am expecting to receive
1488  int op = _leftOp[rule];
1489  // Operand type to catch childs result
1490  // This is what my child will give me.
1491  int opnd_class_instance = s->_rule[op];
1492  // Choose between operand class or not.
1493  // This is what I will recieve.
1494  int catch_op = (FIRST_OPERAND_CLASS <= op && op < NUM_OPERANDS) ? opnd_class_instance : op;
1495  // New rule for child.  Chase operand classes to get the actual rule.
1496  int newrule = s->_rule[catch_op];
1497
1498  if( newrule < NUM_OPERANDS ) {
1499    // Chain from operand or operand class, may be output of shared node
1500    assert( 0 <= opnd_class_instance && opnd_class_instance < NUM_OPERANDS,
1501            "Bad AD file: Instruction chain rule must chain from operand");
1502    // Insert operand into array of operands for this instruction
1503    mach->_opnds[1] = s->MachOperGenerator( opnd_class_instance, C );
1504
1505    ReduceOper( s, newrule, mem, mach );
1506  } else {
1507    // Chain from the result of an instruction
1508    assert( newrule >= _LAST_MACH_OPER, "Do NOT chain from internal operand");
1509    mach->_opnds[1] = s->MachOperGenerator( _reduceOp[catch_op], C );
1510    Node *mem1 = (Node*)1;
1511    mach->add_req( ReduceInst(s, newrule, mem1) );
1512  }
1513  return;
1514}
1515
1516
1517uint Matcher::ReduceInst_Interior( State *s, int rule, Node *&mem, MachNode *mach, uint num_opnds ) {
1518  if( s->_leaf->is_Load() ) {
1519    Node *mem2 = s->_leaf->in(MemNode::Memory);
1520    assert( mem == (Node*)1 || mem == mem2, "multiple Memories being matched at once?" );
1521    mem = mem2;
1522  }
1523  if( s->_leaf->in(0) != NULL && s->_leaf->req() > 1) {
1524    if( mach->in(0) == NULL )
1525      mach->set_req(0, s->_leaf->in(0));
1526  }
1527
1528  // Now recursively walk the state tree & add operand list.
1529  for( uint i=0; i<2; i++ ) {   // binary tree
1530    State *newstate = s->_kids[i];
1531    if( newstate == NULL ) break;      // Might only have 1 child
1532    // 'op' is what I am expecting to receive
1533    int op;
1534    if( i == 0 ) {
1535      op = _leftOp[rule];
1536    } else {
1537      op = _rightOp[rule];
1538    }
1539    // Operand type to catch childs result
1540    // This is what my child will give me.
1541    int opnd_class_instance = newstate->_rule[op];
1542    // Choose between operand class or not.
1543    // This is what I will receive.
1544    int catch_op = (op >= FIRST_OPERAND_CLASS && op < NUM_OPERANDS) ? opnd_class_instance : op;
1545    // New rule for child.  Chase operand classes to get the actual rule.
1546    int newrule = newstate->_rule[catch_op];
1547
1548    if( newrule < NUM_OPERANDS ) { // Operand/operandClass or internalOp/instruction?
1549      // Operand/operandClass
1550      // Insert operand into array of operands for this instruction
1551      mach->_opnds[num_opnds++] = newstate->MachOperGenerator( opnd_class_instance, C );
1552      ReduceOper( newstate, newrule, mem, mach );
1553
1554    } else {                    // Child is internal operand or new instruction
1555      if( newrule < _LAST_MACH_OPER ) { // internal operand or instruction?
1556        // internal operand --> call ReduceInst_Interior
1557        // Interior of complex instruction.  Do nothing but recurse.
1558        num_opnds = ReduceInst_Interior( newstate, newrule, mem, mach, num_opnds );
1559      } else {
1560        // instruction --> call build operand(  ) to catch result
1561        //             --> ReduceInst( newrule )
1562        mach->_opnds[num_opnds++] = s->MachOperGenerator( _reduceOp[catch_op], C );
1563        Node *mem1 = (Node*)1;
1564        mach->add_req( ReduceInst( newstate, newrule, mem1 ) );
1565      }
1566    }
1567    assert( mach->_opnds[num_opnds-1], "" );
1568  }
1569  return num_opnds;
1570}
1571
1572// This routine walks the interior of possible complex operands.
1573// At each point we check our children in the match tree:
1574// (1) No children -
1575//     We are a leaf; add _leaf field as an input to the MachNode
1576// (2) Child is an internal operand -
1577//     Skip over it ( do nothing )
1578// (3) Child is an instruction -
1579//     Call ReduceInst recursively and
1580//     and instruction as an input to the MachNode
1581void Matcher::ReduceOper( State *s, int rule, Node *&mem, MachNode *mach ) {
1582  assert( rule < _LAST_MACH_OPER, "called with operand rule" );
1583  State *kid = s->_kids[0];
1584  assert( kid == NULL || s->_leaf->in(0) == NULL, "internal operands have no control" );
1585
1586  // Leaf?  And not subsumed?
1587  if( kid == NULL && !_swallowed[rule] ) {
1588    mach->add_req( s->_leaf );  // Add leaf pointer
1589    return;                     // Bail out
1590  }
1591
1592  if( s->_leaf->is_Load() ) {
1593    assert( mem == (Node*)1, "multiple Memories being matched at once?" );
1594    mem = s->_leaf->in(MemNode::Memory);
1595  }
1596  if( s->_leaf->in(0) && s->_leaf->req() > 1) {
1597    if( !mach->in(0) )
1598      mach->set_req(0,s->_leaf->in(0));
1599    else {
1600      assert( s->_leaf->in(0) == mach->in(0), "same instruction, differing controls?" );
1601    }
1602  }
1603
1604  for( uint i=0; kid != NULL && i<2; kid = s->_kids[1], i++ ) {   // binary tree
1605    int newrule;
1606    if( i == 0 )
1607      newrule = kid->_rule[_leftOp[rule]];
1608    else
1609      newrule = kid->_rule[_rightOp[rule]];
1610
1611    if( newrule < _LAST_MACH_OPER ) { // Operand or instruction?
1612      // Internal operand; recurse but do nothing else
1613      ReduceOper( kid, newrule, mem, mach );
1614
1615    } else {                    // Child is a new instruction
1616      // Reduce the instruction, and add a direct pointer from this
1617      // machine instruction to the newly reduced one.
1618      Node *mem1 = (Node*)1;
1619      mach->add_req( ReduceInst( kid, newrule, mem1 ) );
1620    }
1621  }
1622}
1623
1624
1625// -------------------------------------------------------------------------
1626// Java-Java calling convention
1627// (what you use when Java calls Java)
1628
1629//------------------------------find_receiver----------------------------------
1630// For a given signature, return the OptoReg for parameter 0.
1631OptoReg::Name Matcher::find_receiver( bool is_outgoing ) {
1632  VMRegPair regs;
1633  BasicType sig_bt = T_OBJECT;
1634  calling_convention(&sig_bt, &regs, 1, is_outgoing);
1635  // Return argument 0 register.  In the LP64 build pointers
1636  // take 2 registers, but the VM wants only the 'main' name.
1637  return OptoReg::as_OptoReg(regs.first());
1638}
1639
1640// A method-klass-holder may be passed in the inline_cache_reg
1641// and then expanded into the inline_cache_reg and a method_oop register
1642//   defined in ad_<arch>.cpp
1643
1644
1645//------------------------------find_shared------------------------------------
1646// Set bits if Node is shared or otherwise a root
1647void Matcher::find_shared( Node *n ) {
1648  // Allocate stack of size C->unique() * 2 to avoid frequent realloc
1649  MStack mstack(C->unique() * 2);
1650  mstack.push(n, Visit);     // Don't need to pre-visit root node
1651  while (mstack.is_nonempty()) {
1652    n = mstack.node();       // Leave node on stack
1653    Node_State nstate = mstack.state();
1654    if (nstate == Pre_Visit) {
1655      if (is_visited(n)) {   // Visited already?
1656        // Node is shared and has no reason to clone.  Flag it as shared.
1657        // This causes it to match into a register for the sharing.
1658        set_shared(n);       // Flag as shared and
1659        mstack.pop();        // remove node from stack
1660        continue;
1661      }
1662      nstate = Visit; // Not already visited; so visit now
1663    }
1664    if (nstate == Visit) {
1665      mstack.set_state(Post_Visit);
1666      set_visited(n);   // Flag as visited now
1667      bool mem_op = false;
1668
1669      switch( n->Opcode() ) {  // Handle some opcodes special
1670      case Op_Phi:             // Treat Phis as shared roots
1671      case Op_Parm:
1672      case Op_Proj:            // All handled specially during matching
1673      case Op_SafePointScalarObject:
1674        set_shared(n);
1675        set_dontcare(n);
1676        break;
1677      case Op_If:
1678      case Op_CountedLoopEnd:
1679        mstack.set_state(Alt_Post_Visit); // Alternative way
1680        // Convert (If (Bool (CmpX A B))) into (If (Bool) (CmpX A B)).  Helps
1681        // with matching cmp/branch in 1 instruction.  The Matcher needs the
1682        // Bool and CmpX side-by-side, because it can only get at constants
1683        // that are at the leaves of Match trees, and the Bool's condition acts
1684        // as a constant here.
1685        mstack.push(n->in(1), Visit);         // Clone the Bool
1686        mstack.push(n->in(0), Pre_Visit);     // Visit control input
1687        continue; // while (mstack.is_nonempty())
1688      case Op_ConvI2D:         // These forms efficiently match with a prior
1689      case Op_ConvI2F:         //   Load but not a following Store
1690        if( n->in(1)->is_Load() &&        // Prior load
1691            n->outcnt() == 1 &&           // Not already shared
1692            n->unique_out()->is_Store() ) // Following store
1693          set_shared(n);       // Force it to be a root
1694        break;
1695      case Op_ReverseBytesI:
1696      case Op_ReverseBytesL:
1697        if( n->in(1)->is_Load() &&        // Prior load
1698            n->outcnt() == 1 )            // Not already shared
1699          set_shared(n);                  // Force it to be a root
1700        break;
1701      case Op_BoxLock:         // Cant match until we get stack-regs in ADLC
1702      case Op_IfFalse:
1703      case Op_IfTrue:
1704      case Op_MachProj:
1705      case Op_MergeMem:
1706      case Op_Catch:
1707      case Op_CatchProj:
1708      case Op_CProj:
1709      case Op_JumpProj:
1710      case Op_JProj:
1711      case Op_NeverBranch:
1712        set_dontcare(n);
1713        break;
1714      case Op_Jump:
1715        mstack.push(n->in(1), Visit);         // Switch Value
1716        mstack.push(n->in(0), Pre_Visit);     // Visit Control input
1717        continue;                             // while (mstack.is_nonempty())
1718      case Op_StrComp:
1719        set_shared(n); // Force result into register (it will be anyways)
1720        break;
1721      case Op_ConP: {  // Convert pointers above the centerline to NUL
1722        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
1723        const TypePtr* tp = tn->type()->is_ptr();
1724        if (tp->_ptr == TypePtr::AnyNull) {
1725          tn->set_type(TypePtr::NULL_PTR);
1726        }
1727        break;
1728      }
1729      case Op_ConN: {  // Convert narrow pointers above the centerline to NUL
1730        TypeNode *tn = n->as_Type(); // Constants derive from type nodes
1731        const TypePtr* tp = tn->type()->is_narrowoop()->make_oopptr();
1732        if (tp->_ptr == TypePtr::AnyNull) {
1733          tn->set_type(TypeNarrowOop::NULL_PTR);
1734        }
1735        break;
1736      }
1737      case Op_Binary:         // These are introduced in the Post_Visit state.
1738        ShouldNotReachHere();
1739        break;
1740      case Op_StoreB:         // Do match these, despite no ideal reg
1741      case Op_StoreC:
1742      case Op_StoreCM:
1743      case Op_StoreD:
1744      case Op_StoreF:
1745      case Op_StoreI:
1746      case Op_StoreL:
1747      case Op_StoreP:
1748      case Op_StoreN:
1749      case Op_Store16B:
1750      case Op_Store8B:
1751      case Op_Store4B:
1752      case Op_Store8C:
1753      case Op_Store4C:
1754      case Op_Store2C:
1755      case Op_Store4I:
1756      case Op_Store2I:
1757      case Op_Store2L:
1758      case Op_Store4F:
1759      case Op_Store2F:
1760      case Op_Store2D:
1761      case Op_ClearArray:
1762      case Op_SafePoint:
1763        mem_op = true;
1764        break;
1765      case Op_LoadB:
1766      case Op_LoadC:
1767      case Op_LoadD:
1768      case Op_LoadF:
1769      case Op_LoadI:
1770      case Op_LoadKlass:
1771      case Op_LoadNKlass:
1772      case Op_LoadL:
1773      case Op_LoadS:
1774      case Op_LoadP:
1775      case Op_LoadN:
1776      case Op_LoadRange:
1777      case Op_LoadD_unaligned:
1778      case Op_LoadL_unaligned:
1779      case Op_Load16B:
1780      case Op_Load8B:
1781      case Op_Load4B:
1782      case Op_Load4C:
1783      case Op_Load2C:
1784      case Op_Load8C:
1785      case Op_Load8S:
1786      case Op_Load4S:
1787      case Op_Load2S:
1788      case Op_Load4I:
1789      case Op_Load2I:
1790      case Op_Load2L:
1791      case Op_Load4F:
1792      case Op_Load2F:
1793      case Op_Load2D:
1794        mem_op = true;
1795        // Must be root of match tree due to prior load conflict
1796        if( C->subsume_loads() == false ) {
1797          set_shared(n);
1798        }
1799        // Fall into default case
1800      default:
1801        if( !n->ideal_reg() )
1802          set_dontcare(n);  // Unmatchable Nodes
1803      } // end_switch
1804
1805      for(int i = n->req() - 1; i >= 0; --i) { // For my children
1806        Node *m = n->in(i); // Get ith input
1807        if (m == NULL) continue;  // Ignore NULLs
1808        uint mop = m->Opcode();
1809
1810        // Must clone all producers of flags, or we will not match correctly.
1811        // Suppose a compare setting int-flags is shared (e.g., a switch-tree)
1812        // then it will match into an ideal Op_RegFlags.  Alas, the fp-flags
1813        // are also there, so we may match a float-branch to int-flags and
1814        // expect the allocator to haul the flags from the int-side to the
1815        // fp-side.  No can do.
1816        if( _must_clone[mop] ) {
1817          mstack.push(m, Visit);
1818          continue; // for(int i = ...)
1819        }
1820
1821        // Clone addressing expressions as they are "free" in most instructions
1822        if( mem_op && i == MemNode::Address && mop == Op_AddP ) {
1823          Node *off = m->in(AddPNode::Offset);
1824          if( off->is_Con() ) {
1825            set_visited(m);  // Flag as visited now
1826            Node *adr = m->in(AddPNode::Address);
1827
1828            // Intel, ARM and friends can handle 2 adds in addressing mode
1829            if( clone_shift_expressions && adr->Opcode() == Op_AddP &&
1830                // AtomicAdd is not an addressing expression.
1831                // Cheap to find it by looking for screwy base.
1832                !adr->in(AddPNode::Base)->is_top() ) {
1833              set_visited(adr);  // Flag as visited now
1834              Node *shift = adr->in(AddPNode::Offset);
1835              // Check for shift by small constant as well
1836              if( shift->Opcode() == Op_LShiftX && shift->in(2)->is_Con() &&
1837                  shift->in(2)->get_int() <= 3 ) {
1838                set_visited(shift);  // Flag as visited now
1839                mstack.push(shift->in(2), Visit);
1840#ifdef _LP64
1841                // Allow Matcher to match the rule which bypass
1842                // ConvI2L operation for an array index on LP64
1843                // if the index value is positive.
1844                if( shift->in(1)->Opcode() == Op_ConvI2L &&
1845                    shift->in(1)->as_Type()->type()->is_long()->_lo >= 0 ) {
1846                  set_visited(shift->in(1));  // Flag as visited now
1847                  mstack.push(shift->in(1)->in(1), Pre_Visit);
1848                } else
1849#endif
1850                mstack.push(shift->in(1), Pre_Visit);
1851              } else {
1852                mstack.push(shift, Pre_Visit);
1853              }
1854              mstack.push(adr->in(AddPNode::Address), Pre_Visit);
1855              mstack.push(adr->in(AddPNode::Base), Pre_Visit);
1856            } else {  // Sparc, Alpha, PPC and friends
1857              mstack.push(adr, Pre_Visit);
1858            }
1859
1860            // Clone X+offset as it also folds into most addressing expressions
1861            mstack.push(off, Visit);
1862            mstack.push(m->in(AddPNode::Base), Pre_Visit);
1863            continue; // for(int i = ...)
1864          } // if( off->is_Con() )
1865        }   // if( mem_op &&
1866        mstack.push(m, Pre_Visit);
1867      }     // for(int i = ...)
1868    }
1869    else if (nstate == Alt_Post_Visit) {
1870      mstack.pop(); // Remove node from stack
1871      // We cannot remove the Cmp input from the Bool here, as the Bool may be
1872      // shared and all users of the Bool need to move the Cmp in parallel.
1873      // This leaves both the Bool and the If pointing at the Cmp.  To
1874      // prevent the Matcher from trying to Match the Cmp along both paths
1875      // BoolNode::match_edge always returns a zero.
1876
1877      // We reorder the Op_If in a pre-order manner, so we can visit without
1878      // accidently sharing the Cmp (the Bool and the If make 2 users).
1879      n->add_req( n->in(1)->in(1) ); // Add the Cmp next to the Bool
1880    }
1881    else if (nstate == Post_Visit) {
1882      mstack.pop(); // Remove node from stack
1883
1884      // Now hack a few special opcodes
1885      switch( n->Opcode() ) {       // Handle some opcodes special
1886      case Op_StorePConditional:
1887      case Op_StoreLConditional:
1888      case Op_CompareAndSwapI:
1889      case Op_CompareAndSwapL:
1890      case Op_CompareAndSwapP:
1891      case Op_CompareAndSwapN: {   // Convert trinary to binary-tree
1892        Node *newval = n->in(MemNode::ValueIn );
1893        Node *oldval  = n->in(LoadStoreNode::ExpectedIn);
1894        Node *pair = new (C, 3) BinaryNode( oldval, newval );
1895        n->set_req(MemNode::ValueIn,pair);
1896        n->del_req(LoadStoreNode::ExpectedIn);
1897        break;
1898      }
1899      case Op_CMoveD:              // Convert trinary to binary-tree
1900      case Op_CMoveF:
1901      case Op_CMoveI:
1902      case Op_CMoveL:
1903      case Op_CMoveN:
1904      case Op_CMoveP: {
1905        // Restructure into a binary tree for Matching.  It's possible that
1906        // we could move this code up next to the graph reshaping for IfNodes
1907        // or vice-versa, but I do not want to debug this for Ladybird.
1908        // 10/2/2000 CNC.
1909        Node *pair1 = new (C, 3) BinaryNode(n->in(1),n->in(1)->in(1));
1910        n->set_req(1,pair1);
1911        Node *pair2 = new (C, 3) BinaryNode(n->in(2),n->in(3));
1912        n->set_req(2,pair2);
1913        n->del_req(3);
1914        break;
1915      }
1916      default:
1917        break;
1918      }
1919    }
1920    else {
1921      ShouldNotReachHere();
1922    }
1923  } // end of while (mstack.is_nonempty())
1924}
1925
1926#ifdef ASSERT
1927// machine-independent root to machine-dependent root
1928void Matcher::dump_old2new_map() {
1929  _old2new_map.dump();
1930}
1931#endif
1932
1933//---------------------------collect_null_checks-------------------------------
1934// Find null checks in the ideal graph; write a machine-specific node for
1935// it.  Used by later implicit-null-check handling.  Actually collects
1936// either an IfTrue or IfFalse for the common NOT-null path, AND the ideal
1937// value being tested.
1938void Matcher::collect_null_checks( Node *proj ) {
1939  Node *iff = proj->in(0);
1940  if( iff->Opcode() == Op_If ) {
1941    // During matching If's have Bool & Cmp side-by-side
1942    BoolNode *b = iff->in(1)->as_Bool();
1943    Node *cmp = iff->in(2);
1944    int opc = cmp->Opcode();
1945    if (opc != Op_CmpP && opc != Op_CmpN) return;
1946
1947    const Type* ct = cmp->in(2)->bottom_type();
1948    if (ct == TypePtr::NULL_PTR ||
1949        (opc == Op_CmpN && ct == TypeNarrowOop::NULL_PTR)) {
1950
1951      if( proj->Opcode() == Op_IfTrue ) {
1952        extern int all_null_checks_found;
1953        all_null_checks_found++;
1954        if( b->_test._test == BoolTest::ne ) {
1955          _null_check_tests.push(proj);
1956          _null_check_tests.push(cmp->in(1));
1957        }
1958      } else {
1959        assert( proj->Opcode() == Op_IfFalse, "" );
1960        if( b->_test._test == BoolTest::eq ) {
1961          _null_check_tests.push(proj);
1962          _null_check_tests.push(cmp->in(1));
1963        }
1964      }
1965    }
1966  }
1967}
1968
1969//---------------------------validate_null_checks------------------------------
1970// Its possible that the value being NULL checked is not the root of a match
1971// tree.  If so, I cannot use the value in an implicit null check.
1972void Matcher::validate_null_checks( ) {
1973  uint cnt = _null_check_tests.size();
1974  for( uint i=0; i < cnt; i+=2 ) {
1975    Node *test = _null_check_tests[i];
1976    Node *val = _null_check_tests[i+1];
1977    if (has_new_node(val)) {
1978      // Is a match-tree root, so replace with the matched value
1979      _null_check_tests.map(i+1, new_node(val));
1980    } else {
1981      // Yank from candidate list
1982      _null_check_tests.map(i+1,_null_check_tests[--cnt]);
1983      _null_check_tests.map(i,_null_check_tests[--cnt]);
1984      _null_check_tests.pop();
1985      _null_check_tests.pop();
1986      i-=2;
1987    }
1988  }
1989}
1990
1991
1992// Used by the DFA in dfa_sparc.cpp.  Check for a prior FastLock
1993// acting as an Acquire and thus we don't need an Acquire here.  We
1994// retain the Node to act as a compiler ordering barrier.
1995bool Matcher::prior_fast_lock( const Node *acq ) {
1996  Node *r = acq->in(0);
1997  if( !r->is_Region() || r->req() <= 1 ) return false;
1998  Node *proj = r->in(1);
1999  if( !proj->is_Proj() ) return false;
2000  Node *call = proj->in(0);
2001  if( !call->is_Call() || call->as_Call()->entry_point() != OptoRuntime::complete_monitor_locking_Java() )
2002    return false;
2003
2004  return true;
2005}
2006
2007// Used by the DFA in dfa_sparc.cpp.  Check for a following FastUnLock
2008// acting as a Release and thus we don't need a Release here.  We
2009// retain the Node to act as a compiler ordering barrier.
2010bool Matcher::post_fast_unlock( const Node *rel ) {
2011  Compile *C = Compile::current();
2012  assert( rel->Opcode() == Op_MemBarRelease, "" );
2013  const MemBarReleaseNode *mem = (const MemBarReleaseNode*)rel;
2014  DUIterator_Fast imax, i = mem->fast_outs(imax);
2015  Node *ctrl = NULL;
2016  while( true ) {
2017    ctrl = mem->fast_out(i);            // Throw out-of-bounds if proj not found
2018    assert( ctrl->is_Proj(), "only projections here" );
2019    ProjNode *proj = (ProjNode*)ctrl;
2020    if( proj->_con == TypeFunc::Control &&
2021        !C->node_arena()->contains(ctrl) ) // Unmatched old-space only
2022      break;
2023    i++;
2024  }
2025  Node *iff = NULL;
2026  for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) {
2027    Node *x = ctrl->fast_out(j);
2028    if( x->is_If() && x->req() > 1 &&
2029        !C->node_arena()->contains(x) ) { // Unmatched old-space only
2030      iff = x;
2031      break;
2032    }
2033  }
2034  if( !iff ) return false;
2035  Node *bol = iff->in(1);
2036  // The iff might be some random subclass of If or bol might be Con-Top
2037  if (!bol->is_Bool())  return false;
2038  assert( bol->req() > 1, "" );
2039  return (bol->in(1)->Opcode() == Op_FastUnlock);
2040}
2041
2042// Used by the DFA in dfa_xxx.cpp.  Check for a following barrier or
2043// atomic instruction acting as a store_load barrier without any
2044// intervening volatile load, and thus we don't need a barrier here.
2045// We retain the Node to act as a compiler ordering barrier.
2046bool Matcher::post_store_load_barrier(const Node *vmb) {
2047  Compile *C = Compile::current();
2048  assert( vmb->is_MemBar(), "" );
2049  assert( vmb->Opcode() != Op_MemBarAcquire, "" );
2050  const MemBarNode *mem = (const MemBarNode*)vmb;
2051
2052  // Get the Proj node, ctrl, that can be used to iterate forward
2053  Node *ctrl = NULL;
2054  DUIterator_Fast imax, i = mem->fast_outs(imax);
2055  while( true ) {
2056    ctrl = mem->fast_out(i);            // Throw out-of-bounds if proj not found
2057    assert( ctrl->is_Proj(), "only projections here" );
2058    ProjNode *proj = (ProjNode*)ctrl;
2059    if( proj->_con == TypeFunc::Control &&
2060        !C->node_arena()->contains(ctrl) ) // Unmatched old-space only
2061      break;
2062    i++;
2063  }
2064
2065  for( DUIterator_Fast jmax, j = ctrl->fast_outs(jmax); j < jmax; j++ ) {
2066    Node *x = ctrl->fast_out(j);
2067    int xop = x->Opcode();
2068
2069    // We don't need current barrier if we see another or a lock
2070    // before seeing volatile load.
2071    //
2072    // Op_Fastunlock previously appeared in the Op_* list below.
2073    // With the advent of 1-0 lock operations we're no longer guaranteed
2074    // that a monitor exit operation contains a serializing instruction.
2075
2076    if (xop == Op_MemBarVolatile ||
2077        xop == Op_FastLock ||
2078        xop == Op_CompareAndSwapL ||
2079        xop == Op_CompareAndSwapP ||
2080        xop == Op_CompareAndSwapN ||
2081        xop == Op_CompareAndSwapI)
2082      return true;
2083
2084    if (x->is_MemBar()) {
2085      // We must retain this membar if there is an upcoming volatile
2086      // load, which will be preceded by acquire membar.
2087      if (xop == Op_MemBarAcquire)
2088        return false;
2089      // For other kinds of barriers, check by pretending we
2090      // are them, and seeing if we can be removed.
2091      else
2092        return post_store_load_barrier((const MemBarNode*)x);
2093    }
2094
2095    // Delicate code to detect case of an upcoming fastlock block
2096    if( x->is_If() && x->req() > 1 &&
2097        !C->node_arena()->contains(x) ) { // Unmatched old-space only
2098      Node *iff = x;
2099      Node *bol = iff->in(1);
2100      // The iff might be some random subclass of If or bol might be Con-Top
2101      if (!bol->is_Bool())  return false;
2102      assert( bol->req() > 1, "" );
2103      return (bol->in(1)->Opcode() == Op_FastUnlock);
2104    }
2105    // probably not necessary to check for these
2106    if (x->is_Call() || x->is_SafePoint() || x->is_block_proj())
2107      return false;
2108  }
2109  return false;
2110}
2111
2112//=============================================================================
2113//---------------------------State---------------------------------------------
2114State::State(void) {
2115#ifdef ASSERT
2116  _id = 0;
2117  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
2118  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
2119  //memset(_cost, -1, sizeof(_cost));
2120  //memset(_rule, -1, sizeof(_rule));
2121#endif
2122  memset(_valid, 0, sizeof(_valid));
2123}
2124
2125#ifdef ASSERT
2126State::~State() {
2127  _id = 99;
2128  _kids[0] = _kids[1] = (State*)(intptr_t) CONST64(0xcafebabecafebabe);
2129  _leaf = (Node*)(intptr_t) CONST64(0xbaadf00dbaadf00d);
2130  memset(_cost, -3, sizeof(_cost));
2131  memset(_rule, -3, sizeof(_rule));
2132}
2133#endif
2134
2135#ifndef PRODUCT
2136//---------------------------dump----------------------------------------------
2137void State::dump() {
2138  tty->print("\n");
2139  dump(0);
2140}
2141
2142void State::dump(int depth) {
2143  for( int j = 0; j < depth; j++ )
2144    tty->print("   ");
2145  tty->print("--N: ");
2146  _leaf->dump();
2147  uint i;
2148  for( i = 0; i < _LAST_MACH_OPER; i++ )
2149    // Check for valid entry
2150    if( valid(i) ) {
2151      for( int j = 0; j < depth; j++ )
2152        tty->print("   ");
2153        assert(_cost[i] != max_juint, "cost must be a valid value");
2154        assert(_rule[i] < _last_Mach_Node, "rule[i] must be valid rule");
2155        tty->print_cr("%s  %d  %s",
2156                      ruleName[i], _cost[i], ruleName[_rule[i]] );
2157      }
2158  tty->print_cr("");
2159
2160  for( i=0; i<2; i++ )
2161    if( _kids[i] )
2162      _kids[i]->dump(depth+1);
2163}
2164#endif
2165