x86_32.ad revision 13370:731370f39fcd
1//
2// Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
78// allocator, and only shows up in the encodings.
79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81// Ok so here's the trick FPR1 is really st(0) except in the midst
82// of emission of assembly for a machnode. During the emission the fpu stack
83// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84// the stack will not have this element so FPR1 == st(0) from the
85// oopMap viewpoint. This same weirdness with numbering causes
86// instruction encoding to have to play games with the register
87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88// where it does flt->flt moves to see an example
89//
90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104//
105// Empty fill registers, which are never used, but supply alignment to xmm regs
106//
107reg_def FILL0( SOC, SOC, Op_RegF, 8, VMRegImpl::Bad());
108reg_def FILL1( SOC, SOC, Op_RegF, 9, VMRegImpl::Bad());
109reg_def FILL2( SOC, SOC, Op_RegF, 10, VMRegImpl::Bad());
110reg_def FILL3( SOC, SOC, Op_RegF, 11, VMRegImpl::Bad());
111reg_def FILL4( SOC, SOC, Op_RegF, 12, VMRegImpl::Bad());
112reg_def FILL5( SOC, SOC, Op_RegF, 13, VMRegImpl::Bad());
113reg_def FILL6( SOC, SOC, Op_RegF, 14, VMRegImpl::Bad());
114reg_def FILL7( SOC, SOC, Op_RegF, 15, VMRegImpl::Bad());
115
116// Specify priority of register selection within phases of register
117// allocation.  Highest priority is first.  A useful heuristic is to
118// give registers a low priority when they are required by machine
119// instructions, like EAX and EDX.  Registers which are used as
120// pairs must fall on an even boundary (witness the FPR#L's in this list).
121// For the Intel integer registers, the equivalent Long pairs are
122// EDX:EAX, EBX:ECX, and EDI:EBP.
123alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
124                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126                    FPR6L, FPR6H, FPR7L, FPR7H,
127                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130//----------Architecture Description Register Classes--------------------------
131// Several register classes are automatically defined based upon information in
132// this architecture description.
133// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
134// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
137//
138// Class for no registers (empty set).
139reg_class no_reg();
140
141// Class for all registers
142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
143// Class for all registers (excluding EBP)
144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
145// Dynamic register class that selects at runtime between register classes
146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
149
150// Class for general registers
151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
152// Class for general registers (excluding EBP).
153// This register class can be used for implicit null checks on win95.
154// It is also safe for use by tailjumps (we don't want to allocate in ebp).
155// Used also if the PreserveFramePointer flag is true.
156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
157// Dynamic register class that selects between int_reg and int_reg_no_ebp.
158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
159
160// Class of "X" registers
161reg_class int_x_reg(EBX, ECX, EDX, EAX);
162
163// Class of registers that can appear in an address with no offset.
164// EBP and ESP require an extra instruction byte for zero offset.
165// Used in fast-unlock
166reg_class p_reg(EDX, EDI, ESI, EBX);
167
168// Class for general registers excluding ECX
169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
170// Class for general registers excluding ECX (and EBP)
171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
174
175// Class for general registers excluding EAX
176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
177
178// Class for general registers excluding EAX and EBX.
179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
180// Class for general registers excluding EAX and EBX (and EBP)
181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
184
185// Class of EAX (for multiply and divide operations)
186reg_class eax_reg(EAX);
187
188// Class of EBX (for atomic add)
189reg_class ebx_reg(EBX);
190
191// Class of ECX (for shift and JCXZ operations and cmpLTMask)
192reg_class ecx_reg(ECX);
193
194// Class of EDX (for multiply and divide operations)
195reg_class edx_reg(EDX);
196
197// Class of EDI (for synchronization)
198reg_class edi_reg(EDI);
199
200// Class of ESI (for synchronization)
201reg_class esi_reg(ESI);
202
203// Singleton class for stack pointer
204reg_class sp_reg(ESP);
205
206// Singleton class for instruction pointer
207// reg_class ip_reg(EIP);
208
209// Class of integer register pairs
210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
211// Class of integer register pairs (excluding EBP and EDI);
212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
213// Dynamic register class that selects between long_reg and long_reg_no_ebp.
214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
215
216// Class of integer register pairs that aligns with calling convention
217reg_class eadx_reg( EAX,EDX );
218reg_class ebcx_reg( ECX,EBX );
219
220// Not AX or DX, used in divides
221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
222// Not AX or DX (and neither EBP), used in divides
223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
226
227// Floating point registers.  Notice FPR0 is not a choice.
228// FPR0 is not ever allocated; we use clever encodings to fake
229// a 2-address instructions out of Intels FP stack.
230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
231
232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
233                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
234                      FPR7L,FPR7H );
235
236reg_class fp_flt_reg0( FPR1L );
237reg_class fp_dbl_reg0( FPR1L,FPR1H );
238reg_class fp_dbl_reg1( FPR2L,FPR2H );
239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
240                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
241
242%}
243
244
245//----------SOURCE BLOCK-------------------------------------------------------
246// This is a block of C++ code which provides values, functions, and
247// definitions necessary in the rest of the architecture description
248source_hpp %{
249// Must be visible to the DFA in dfa_x86_32.cpp
250extern bool is_operand_hi32_zero(Node* n);
251%}
252
253source %{
254#define   RELOC_IMM32    Assembler::imm_operand
255#define   RELOC_DISP32   Assembler::disp32_operand
256
257#define __ _masm.
258
259// How to find the high register of a Long pair, given the low register
260#define   HIGH_FROM_LOW(x) ((x)+2)
261
262// These masks are used to provide 128-bit aligned bitmasks to the XMM
263// instructions, to allow sign-masking or sign-bit flipping.  They allow
264// fast versions of NegF/NegD and AbsF/AbsD.
265
266// Note: 'double' and 'long long' have 32-bits alignment on x86.
267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
268  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
269  // of 128-bits operands for SSE instructions.
270  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
271  // Store the value to a 128-bits operand.
272  operand[0] = lo;
273  operand[1] = hi;
274  return operand;
275}
276
277// Buffer for 128-bits masks used by SSE instructions.
278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
279
280// Static initialization during VM startup.
281static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
283static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
285
286// Offset hacking within calls.
287static int pre_call_resets_size() {
288  int size = 0;
289  Compile* C = Compile::current();
290  if (C->in_24_bit_fp_mode()) {
291    size += 6; // fldcw
292  }
293  if (VM_Version::supports_vzeroupper()) {
294    size += 3; // vzeroupper
295  }
296  return size;
297}
298
299// !!!!! Special hack to get all type of calls to specify the byte offset
300//       from the start of the call to the point where the return address
301//       will point.
302int MachCallStaticJavaNode::ret_addr_offset() {
303  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
304}
305
306int MachCallDynamicJavaNode::ret_addr_offset() {
307  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
308}
309
310static int sizeof_FFree_Float_Stack_All = -1;
311
312int MachCallRuntimeNode::ret_addr_offset() {
313  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
314  return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
315}
316
317// Indicate if the safepoint node needs the polling page as an input.
318// Since x86 does have absolute addressing, it doesn't.
319bool SafePointNode::needs_polling_address_input() {
320  return false;
321}
322
323//
324// Compute padding required for nodes which need alignment
325//
326
327// The address of the call instruction needs to be 4-byte aligned to
328// ensure that it does not span a cache line so that it can be patched.
329int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
330  current_offset += pre_call_resets_size();  // skip fldcw, if any
331  current_offset += 1;      // skip call opcode byte
332  return align_up(current_offset, alignment_required()) - current_offset;
333}
334
335// The address of the call instruction needs to be 4-byte aligned to
336// ensure that it does not span a cache line so that it can be patched.
337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338  current_offset += pre_call_resets_size();  // skip fldcw, if any
339  current_offset += 5;      // skip MOV instruction
340  current_offset += 1;      // skip call opcode byte
341  return align_up(current_offset, alignment_required()) - current_offset;
342}
343
344// EMIT_RM()
345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347  cbuf.insts()->emit_int8(c);
348}
349
350// EMIT_CC()
351void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352  unsigned char c = (unsigned char)( f1 | f2 );
353  cbuf.insts()->emit_int8(c);
354}
355
356// EMIT_OPCODE()
357void emit_opcode(CodeBuffer &cbuf, int code) {
358  cbuf.insts()->emit_int8((unsigned char) code);
359}
360
361// EMIT_OPCODE() w/ relocation information
362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364  emit_opcode(cbuf, code);
365}
366
367// EMIT_D8()
368void emit_d8(CodeBuffer &cbuf, int d8) {
369  cbuf.insts()->emit_int8((unsigned char) d8);
370}
371
372// EMIT_D16()
373void emit_d16(CodeBuffer &cbuf, int d16) {
374  cbuf.insts()->emit_int16(d16);
375}
376
377// EMIT_D32()
378void emit_d32(CodeBuffer &cbuf, int d32) {
379  cbuf.insts()->emit_int32(d32);
380}
381
382// emit 32 bit value and construct relocation entry from relocInfo::relocType
383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384        int format) {
385  cbuf.relocate(cbuf.insts_mark(), reloc, format);
386  cbuf.insts()->emit_int32(d32);
387}
388
389// emit 32 bit value and construct relocation entry from RelocationHolder
390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391        int format) {
392#ifdef ASSERT
393  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394    assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
395  }
396#endif
397  cbuf.relocate(cbuf.insts_mark(), rspec, format);
398  cbuf.insts()->emit_int32(d32);
399}
400
401// Access stack slot for load or store
402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
404  if( -128 <= disp && disp <= 127 ) {
405    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
406    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
407    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
408  } else {
409    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
410    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
411    emit_d32(cbuf, disp);     // Displacement  // R/M byte
412  }
413}
414
415   // rRegI ereg, memory mem) %{    // emit_reg_mem
416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
417  // There is no index & no scale, use form without SIB byte
418  if ((index == 0x4) &&
419      (scale == 0) && (base != ESP_enc)) {
420    // If no displacement, mode is 0x0; unless base is [EBP]
421    if ( (displace == 0) && (base != EBP_enc) ) {
422      emit_rm(cbuf, 0x0, reg_encoding, base);
423    }
424    else {                    // If 8-bit displacement, mode 0x1
425      if ((displace >= -128) && (displace <= 127)
426          && (disp_reloc == relocInfo::none) ) {
427        emit_rm(cbuf, 0x1, reg_encoding, base);
428        emit_d8(cbuf, displace);
429      }
430      else {                  // If 32-bit displacement
431        if (base == -1) { // Special flag for absolute address
432          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433          // (manual lies; no SIB needed here)
434          if ( disp_reloc != relocInfo::none ) {
435            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
436          } else {
437            emit_d32      (cbuf, displace);
438          }
439        }
440        else {                // Normal base + offset
441          emit_rm(cbuf, 0x2, reg_encoding, base);
442          if ( disp_reloc != relocInfo::none ) {
443            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444          } else {
445            emit_d32      (cbuf, displace);
446          }
447        }
448      }
449    }
450  }
451  else {                      // Else, encode with the SIB byte
452    // If no displacement, mode is 0x0; unless base is [EBP]
453    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
454      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455      emit_rm(cbuf, scale, index, base);
456    }
457    else {                    // If 8-bit displacement, mode 0x1
458      if ((displace >= -128) && (displace <= 127)
459          && (disp_reloc == relocInfo::none) ) {
460        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461        emit_rm(cbuf, scale, index, base);
462        emit_d8(cbuf, displace);
463      }
464      else {                  // If 32-bit displacement
465        if (base == 0x04 ) {
466          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467          emit_rm(cbuf, scale, index, 0x04);
468        } else {
469          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470          emit_rm(cbuf, scale, index, base);
471        }
472        if ( disp_reloc != relocInfo::none ) {
473          emit_d32_reloc(cbuf, displace, disp_reloc, 1);
474        } else {
475          emit_d32      (cbuf, displace);
476        }
477      }
478    }
479  }
480}
481
482
483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484  if( dst_encoding == src_encoding ) {
485    // reg-reg copy, use an empty encoding
486  } else {
487    emit_opcode( cbuf, 0x8B );
488    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489  }
490}
491
492void emit_cmpfp_fixup(MacroAssembler& _masm) {
493  Label exit;
494  __ jccb(Assembler::noParity, exit);
495  __ pushf();
496  //
497  // comiss/ucomiss instructions set ZF,PF,CF flags and
498  // zero OF,AF,SF for NaN values.
499  // Fixup flags by zeroing ZF,PF so that compare of NaN
500  // values returns 'less than' result (CF is set).
501  // Leave the rest of flags unchanged.
502  //
503  //    7 6 5 4 3 2 1 0
504  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
505  //    0 0 1 0 1 0 1 1   (0x2B)
506  //
507  __ andl(Address(rsp, 0), 0xffffff2b);
508  __ popf();
509  __ bind(exit);
510}
511
512void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513  Label done;
514  __ movl(dst, -1);
515  __ jcc(Assembler::parity, done);
516  __ jcc(Assembler::below, done);
517  __ setb(Assembler::notEqual, dst);
518  __ movzbl(dst, dst);
519  __ bind(done);
520}
521
522
523//=============================================================================
524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525
526int Compile::ConstantTable::calculate_table_base_offset() const {
527  return 0;  // absolute addressing, no offset
528}
529
530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
532  ShouldNotReachHere();
533}
534
535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
536  // Empty encoding
537}
538
539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
540  return 0;
541}
542
543#ifndef PRODUCT
544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
545  st->print("# MachConstantBaseNode (empty encoding)");
546}
547#endif
548
549
550//=============================================================================
551#ifndef PRODUCT
552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553  Compile* C = ra_->C;
554
555  int framesize = C->frame_size_in_bytes();
556  int bangsize = C->bang_size_in_bytes();
557  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
558  // Remove wordSize for return addr which is already pushed.
559  framesize -= wordSize;
560
561  if (C->need_stack_bang(bangsize)) {
562    framesize -= wordSize;
563    st->print("# stack bang (%d bytes)", bangsize);
564    st->print("\n\t");
565    st->print("PUSH   EBP\t# Save EBP");
566    if (PreserveFramePointer) {
567      st->print("\n\t");
568      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
569    }
570    if (framesize) {
571      st->print("\n\t");
572      st->print("SUB    ESP, #%d\t# Create frame",framesize);
573    }
574  } else {
575    st->print("SUB    ESP, #%d\t# Create frame",framesize);
576    st->print("\n\t");
577    framesize -= wordSize;
578    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
579    if (PreserveFramePointer) {
580      st->print("\n\t");
581      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
582      if (framesize > 0) {
583        st->print("\n\t");
584        st->print("ADD    EBP, #%d", framesize);
585      }
586    }
587  }
588
589  if (VerifyStackAtCalls) {
590    st->print("\n\t");
591    framesize -= wordSize;
592    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
593  }
594
595  if( C->in_24_bit_fp_mode() ) {
596    st->print("\n\t");
597    st->print("FLDCW  \t# load 24 bit fpu control word");
598  }
599  if (UseSSE >= 2 && VerifyFPU) {
600    st->print("\n\t");
601    st->print("# verify FPU stack (must be clean on entry)");
602  }
603
604#ifdef ASSERT
605  if (VerifyStackAtCalls) {
606    st->print("\n\t");
607    st->print("# stack alignment check");
608  }
609#endif
610  st->cr();
611}
612#endif
613
614
615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
616  Compile* C = ra_->C;
617  MacroAssembler _masm(&cbuf);
618
619  int framesize = C->frame_size_in_bytes();
620  int bangsize = C->bang_size_in_bytes();
621
622  __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
623
624  C->set_frame_complete(cbuf.insts_size());
625
626  if (C->has_mach_constant_base_node()) {
627    // NOTE: We set the table base offset here because users might be
628    // emitted before MachConstantBaseNode.
629    Compile::ConstantTable& constant_table = C->constant_table();
630    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
631  }
632}
633
634uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
635  return MachNode::size(ra_); // too many variables; just compute it the hard way
636}
637
638int MachPrologNode::reloc() const {
639  return 0; // a large enough number
640}
641
642//=============================================================================
643#ifndef PRODUCT
644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
645  Compile *C = ra_->C;
646  int framesize = C->frame_size_in_bytes();
647  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
648  // Remove two words for return addr and rbp,
649  framesize -= 2*wordSize;
650
651  if (C->max_vector_size() > 16) {
652    st->print("VZEROUPPER");
653    st->cr(); st->print("\t");
654  }
655  if (C->in_24_bit_fp_mode()) {
656    st->print("FLDCW  standard control word");
657    st->cr(); st->print("\t");
658  }
659  if (framesize) {
660    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
661    st->cr(); st->print("\t");
662  }
663  st->print_cr("POPL   EBP"); st->print("\t");
664  if (do_polling() && C->is_method_compilation()) {
665    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
666    st->cr(); st->print("\t");
667  }
668}
669#endif
670
671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
672  Compile *C = ra_->C;
673  MacroAssembler _masm(&cbuf);
674
675  if (C->max_vector_size() > 16) {
676    // Clear upper bits of YMM registers when current compiled code uses
677    // wide vectors to avoid AVX <-> SSE transition penalty during call.
678    _masm.vzeroupper();
679  }
680  // If method set FPU control word, restore to standard control word
681  if (C->in_24_bit_fp_mode()) {
682    _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
683  }
684
685  int framesize = C->frame_size_in_bytes();
686  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687  // Remove two words for return addr and rbp,
688  framesize -= 2*wordSize;
689
690  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
691
692  if (framesize >= 128) {
693    emit_opcode(cbuf, 0x81); // add  SP, #framesize
694    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
695    emit_d32(cbuf, framesize);
696  } else if (framesize) {
697    emit_opcode(cbuf, 0x83); // add  SP, #framesize
698    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
699    emit_d8(cbuf, framesize);
700  }
701
702  emit_opcode(cbuf, 0x58 | EBP_enc);
703
704  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
705    __ reserved_stack_check();
706  }
707
708  if (do_polling() && C->is_method_compilation()) {
709    cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
710    emit_opcode(cbuf,0x85);
711    emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
712    emit_d32(cbuf, (intptr_t)os::get_polling_page());
713  }
714}
715
716uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
717  Compile *C = ra_->C;
718  // If method set FPU control word, restore to standard control word
719  int size = C->in_24_bit_fp_mode() ? 6 : 0;
720  if (C->max_vector_size() > 16) size += 3; // vzeroupper
721  if (do_polling() && C->is_method_compilation()) size += 6;
722
723  int framesize = C->frame_size_in_bytes();
724  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
725  // Remove two words for return addr and rbp,
726  framesize -= 2*wordSize;
727
728  size++; // popl rbp,
729
730  if (framesize >= 128) {
731    size += 6;
732  } else {
733    size += framesize ? 3 : 0;
734  }
735  size += 64; // added to support ReservedStackAccess
736  return size;
737}
738
739int MachEpilogNode::reloc() const {
740  return 0; // a large enough number
741}
742
743const Pipeline * MachEpilogNode::pipeline() const {
744  return MachNode::pipeline_class();
745}
746
747int MachEpilogNode::safepoint_offset() const { return 0; }
748
749//=============================================================================
750
751enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
752static enum RC rc_class( OptoReg::Name reg ) {
753
754  if( !OptoReg::is_valid(reg)  ) return rc_bad;
755  if (OptoReg::is_stack(reg)) return rc_stack;
756
757  VMReg r = OptoReg::as_VMReg(reg);
758  if (r->is_Register()) return rc_int;
759  if (r->is_FloatRegister()) {
760    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
761    return rc_float;
762  }
763  assert(r->is_XMMRegister(), "must be");
764  return rc_xmm;
765}
766
767static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
768                        int opcode, const char *op_str, int size, outputStream* st ) {
769  if( cbuf ) {
770    emit_opcode  (*cbuf, opcode );
771    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
772#ifndef PRODUCT
773  } else if( !do_size ) {
774    if( size != 0 ) st->print("\n\t");
775    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
776      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
777      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
778    } else { // FLD, FST, PUSH, POP
779      st->print("%s [ESP + #%d]",op_str,offset);
780    }
781#endif
782  }
783  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
784  return size+3+offset_size;
785}
786
787// Helper for XMM registers.  Extra opcode bits, limited syntax.
788static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
789                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
790  int in_size_in_bits = Assembler::EVEX_32bit;
791  int evex_encoding = 0;
792  if (reg_lo+1 == reg_hi) {
793    in_size_in_bits = Assembler::EVEX_64bit;
794    evex_encoding = Assembler::VEX_W;
795  }
796  if (cbuf) {
797    MacroAssembler _masm(cbuf);
798    // EVEX spills remain EVEX: Compressed displacemement is better than AVX on spill mem operations, 
799    //                          it maps more cases to single byte displacement
800    _masm.set_managed();
801    if (reg_lo+1 == reg_hi) { // double move?
802      if (is_load) {
803        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
804      } else {
805        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
806      }
807    } else {
808      if (is_load) {
809        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
810      } else {
811        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
812      }
813    }
814#ifndef PRODUCT
815  } else if (!do_size) {
816    if (size != 0) st->print("\n\t");
817    if (reg_lo+1 == reg_hi) { // double move?
818      if (is_load) st->print("%s %s,[ESP + #%d]",
819                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
820                              Matcher::regName[reg_lo], offset);
821      else         st->print("MOVSD  [ESP + #%d],%s",
822                              offset, Matcher::regName[reg_lo]);
823    } else {
824      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
825                              Matcher::regName[reg_lo], offset);
826      else         st->print("MOVSS  [ESP + #%d],%s",
827                              offset, Matcher::regName[reg_lo]);
828    }
829#endif
830  }
831  bool is_single_byte = false;
832  if ((UseAVX > 2) && (offset != 0)) {
833    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
834  }
835  int offset_size = 0;
836  if (UseAVX > 2 ) {
837    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
838  } else {
839    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
840  }
841  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
842  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
843  return size+5+offset_size;
844}
845
846
847static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
848                            int src_hi, int dst_hi, int size, outputStream* st ) {
849  if (cbuf) {
850    MacroAssembler _masm(cbuf);
851    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
852    _masm.set_managed();
853    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
854      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
855                as_XMMRegister(Matcher::_regEncode[src_lo]));
856    } else {
857      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
858                as_XMMRegister(Matcher::_regEncode[src_lo]));
859    }
860#ifndef PRODUCT
861  } else if (!do_size) {
862    if (size != 0) st->print("\n\t");
863    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
864      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
865        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
866      } else {
867        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
868      }
869    } else {
870      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
871        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
872      } else {
873        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
874      }
875    }
876#endif
877  }
878  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
879  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
880  int sz = (UseAVX > 2) ? 6 : 4;
881  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
882      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
883  return size + sz;
884}
885
886static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
887                            int src_hi, int dst_hi, int size, outputStream* st ) {
888  // 32-bit
889  if (cbuf) {
890    MacroAssembler _masm(cbuf);
891    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
892    _masm.set_managed();
893    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
894             as_Register(Matcher::_regEncode[src_lo]));
895#ifndef PRODUCT
896  } else if (!do_size) {
897    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
898#endif
899  }
900  return (UseAVX> 2) ? 6 : 4;
901}
902
903
904static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
905                                 int src_hi, int dst_hi, int size, outputStream* st ) {
906  // 32-bit
907  if (cbuf) {
908    MacroAssembler _masm(cbuf);
909    // EVEX spills remain EVEX: logic complex between full EVEX, partial and AVX, manage EVEX spill code one way.
910    _masm.set_managed();
911    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
912             as_XMMRegister(Matcher::_regEncode[src_lo]));
913#ifndef PRODUCT
914  } else if (!do_size) {
915    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
916#endif
917  }
918  return (UseAVX> 2) ? 6 : 4;
919}
920
921static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
922  if( cbuf ) {
923    emit_opcode(*cbuf, 0x8B );
924    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
925#ifndef PRODUCT
926  } else if( !do_size ) {
927    if( size != 0 ) st->print("\n\t");
928    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
929#endif
930  }
931  return size+2;
932}
933
934static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
935                                 int offset, int size, outputStream* st ) {
936  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
937    if( cbuf ) {
938      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
939      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
940#ifndef PRODUCT
941    } else if( !do_size ) {
942      if( size != 0 ) st->print("\n\t");
943      st->print("FLD    %s",Matcher::regName[src_lo]);
944#endif
945    }
946    size += 2;
947  }
948
949  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
950  const char *op_str;
951  int op;
952  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
953    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
954    op = 0xDD;
955  } else {                   // 32-bit store
956    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
957    op = 0xD9;
958    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
959  }
960
961  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
962}
963
964// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
965static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
966                          int src_hi, int dst_hi, uint ireg, outputStream* st);
967
968static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
969                            int stack_offset, int reg, uint ireg, outputStream* st);
970
971static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
972                                     int dst_offset, uint ireg, outputStream* st) {
973  int calc_size = 0;
974  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
975  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
976  switch (ireg) {
977  case Op_VecS:
978    calc_size = 3+src_offset_size + 3+dst_offset_size;
979    break;
980  case Op_VecD: {
981    calc_size = 3+src_offset_size + 3+dst_offset_size;
982    int tmp_src_offset = src_offset + 4;
983    int tmp_dst_offset = dst_offset + 4;
984    src_offset_size = (tmp_src_offset == 0) ? 0 : ((tmp_src_offset < 0x80) ? 1 : 4);
985    dst_offset_size = (tmp_dst_offset == 0) ? 0 : ((tmp_dst_offset < 0x80) ? 1 : 4);
986    calc_size += 3+src_offset_size + 3+dst_offset_size;
987    break;
988  }   
989  case Op_VecX:
990  case Op_VecY:
991  case Op_VecZ:
992    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
993    break;
994  default:
995    ShouldNotReachHere();
996  }
997  if (cbuf) {
998    MacroAssembler _masm(cbuf);
999    int offset = __ offset();
1000    switch (ireg) {
1001    case Op_VecS:
1002      __ pushl(Address(rsp, src_offset));
1003      __ popl (Address(rsp, dst_offset));
1004      break;
1005    case Op_VecD:
1006      __ pushl(Address(rsp, src_offset));
1007      __ popl (Address(rsp, dst_offset));
1008      __ pushl(Address(rsp, src_offset+4));
1009      __ popl (Address(rsp, dst_offset+4));
1010      break;
1011    case Op_VecX:
1012      __ movdqu(Address(rsp, -16), xmm0);
1013      __ movdqu(xmm0, Address(rsp, src_offset));
1014      __ movdqu(Address(rsp, dst_offset), xmm0);
1015      __ movdqu(xmm0, Address(rsp, -16));
1016      break;
1017    case Op_VecY:
1018      __ vmovdqu(Address(rsp, -32), xmm0);
1019      __ vmovdqu(xmm0, Address(rsp, src_offset));
1020      __ vmovdqu(Address(rsp, dst_offset), xmm0);
1021      __ vmovdqu(xmm0, Address(rsp, -32));
1022      break;
1023    case Op_VecZ:
1024      __ evmovdquq(Address(rsp, -64), xmm0, 2);
1025      __ evmovdquq(xmm0, Address(rsp, src_offset), 2);
1026      __ evmovdquq(Address(rsp, dst_offset), xmm0, 2);
1027      __ evmovdquq(xmm0, Address(rsp, -64), 2);
1028      break;
1029    default:
1030      ShouldNotReachHere();
1031    }
1032    int size = __ offset() - offset;
1033    assert(size == calc_size, "incorrect size calculation");
1034    return size;
1035#ifndef PRODUCT
1036  } else if (!do_size) {
1037    switch (ireg) {
1038    case Op_VecS:
1039      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1040                "popl    [rsp + #%d]",
1041                src_offset, dst_offset);
1042      break;
1043    case Op_VecD:
1044      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1045                "popq    [rsp + #%d]\n\t"
1046                "pushl   [rsp + #%d]\n\t"
1047                "popq    [rsp + #%d]",
1048                src_offset, dst_offset, src_offset+4, dst_offset+4);
1049      break;
1050     case Op_VecX:
1051      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1052                "movdqu  xmm0, [rsp + #%d]\n\t"
1053                "movdqu  [rsp + #%d], xmm0\n\t"
1054                "movdqu  xmm0, [rsp - #16]",
1055                src_offset, dst_offset);
1056      break;
1057    case Op_VecY:
1058      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1059                "vmovdqu xmm0, [rsp + #%d]\n\t"
1060                "vmovdqu [rsp + #%d], xmm0\n\t"
1061                "vmovdqu xmm0, [rsp - #32]",
1062                src_offset, dst_offset);
1063      break;
1064    case Op_VecZ:
1065      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1066                "vmovdqu xmm0, [rsp + #%d]\n\t"
1067                "vmovdqu [rsp + #%d], xmm0\n\t"
1068                "vmovdqu xmm0, [rsp - #64]",
1069                src_offset, dst_offset);
1070      break;
1071    default:
1072      ShouldNotReachHere();
1073    }
1074#endif
1075  }
1076  return calc_size;
1077}
1078
1079uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1080  // Get registers to move
1081  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1082  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1083  OptoReg::Name dst_second = ra_->get_reg_second(this );
1084  OptoReg::Name dst_first = ra_->get_reg_first(this );
1085
1086  enum RC src_second_rc = rc_class(src_second);
1087  enum RC src_first_rc = rc_class(src_first);
1088  enum RC dst_second_rc = rc_class(dst_second);
1089  enum RC dst_first_rc = rc_class(dst_first);
1090
1091  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1092
1093  // Generate spill code!
1094  int size = 0;
1095
1096  if( src_first == dst_first && src_second == dst_second )
1097    return size;            // Self copy, no move
1098
1099  if (bottom_type()->isa_vect() != NULL) {
1100    uint ireg = ideal_reg();
1101    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1102    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1103    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1104    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1105      // mem -> mem
1106      int src_offset = ra_->reg2offset(src_first);
1107      int dst_offset = ra_->reg2offset(dst_first);
1108      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1109    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1110      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1111    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1112      int stack_offset = ra_->reg2offset(dst_first);
1113      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1114    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1115      int stack_offset = ra_->reg2offset(src_first);
1116      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1117    } else {
1118      ShouldNotReachHere();
1119    }
1120  }
1121
1122  // --------------------------------------
1123  // Check for mem-mem move.  push/pop to move.
1124  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1125    if( src_second == dst_first ) { // overlapping stack copy ranges
1126      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1127      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1128      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1129      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1130    }
1131    // move low bits
1132    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1133    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1134    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1135      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1136      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1137    }
1138    return size;
1139  }
1140
1141  // --------------------------------------
1142  // Check for integer reg-reg copy
1143  if( src_first_rc == rc_int && dst_first_rc == rc_int )
1144    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1145
1146  // Check for integer store
1147  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1148    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1149
1150  // Check for integer load
1151  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1152    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1153
1154  // Check for integer reg-xmm reg copy
1155  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1156    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1157            "no 64 bit integer-float reg moves" );
1158    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1159  }
1160  // --------------------------------------
1161  // Check for float reg-reg copy
1162  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1163    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1164            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1165    if( cbuf ) {
1166
1167      // Note the mucking with the register encode to compensate for the 0/1
1168      // indexing issue mentioned in a comment in the reg_def sections
1169      // for FPR registers many lines above here.
1170
1171      if( src_first != FPR1L_num ) {
1172        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1173        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1174        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1175        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1176     } else {
1177        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1178        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1179     }
1180#ifndef PRODUCT
1181    } else if( !do_size ) {
1182      if( size != 0 ) st->print("\n\t");
1183      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1184      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1185#endif
1186    }
1187    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1188  }
1189
1190  // Check for float store
1191  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1192    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1193  }
1194
1195  // Check for float load
1196  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1197    int offset = ra_->reg2offset(src_first);
1198    const char *op_str;
1199    int op;
1200    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1201      op_str = "FLD_D";
1202      op = 0xDD;
1203    } else {                   // 32-bit load
1204      op_str = "FLD_S";
1205      op = 0xD9;
1206      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1207    }
1208    if( cbuf ) {
1209      emit_opcode  (*cbuf, op );
1210      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1211      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1212      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1213#ifndef PRODUCT
1214    } else if( !do_size ) {
1215      if( size != 0 ) st->print("\n\t");
1216      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1217#endif
1218    }
1219    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1220    return size + 3+offset_size+2;
1221  }
1222
1223  // Check for xmm reg-reg copy
1224  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1225    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1226            (src_first+1 == src_second && dst_first+1 == dst_second),
1227            "no non-adjacent float-moves" );
1228    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1229  }
1230
1231  // Check for xmm reg-integer reg copy
1232  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1233    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1234            "no 64 bit float-integer reg moves" );
1235    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1236  }
1237
1238  // Check for xmm store
1239  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1240    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1241  }
1242
1243  // Check for float xmm load
1244  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1245    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1246  }
1247
1248  // Copy from float reg to xmm reg
1249  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1250    // copy to the top of stack from floating point reg
1251    // and use LEA to preserve flags
1252    if( cbuf ) {
1253      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1254      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1255      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1256      emit_d8(*cbuf,0xF8);
1257#ifndef PRODUCT
1258    } else if( !do_size ) {
1259      if( size != 0 ) st->print("\n\t");
1260      st->print("LEA    ESP,[ESP-8]");
1261#endif
1262    }
1263    size += 4;
1264
1265    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1266
1267    // Copy from the temp memory to the xmm reg.
1268    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1269
1270    if( cbuf ) {
1271      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1272      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1273      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1274      emit_d8(*cbuf,0x08);
1275#ifndef PRODUCT
1276    } else if( !do_size ) {
1277      if( size != 0 ) st->print("\n\t");
1278      st->print("LEA    ESP,[ESP+8]");
1279#endif
1280    }
1281    size += 4;
1282    return size;
1283  }
1284
1285  assert( size > 0, "missed a case" );
1286
1287  // --------------------------------------------------------------------
1288  // Check for second bits still needing moving.
1289  if( src_second == dst_second )
1290    return size;               // Self copy; no move
1291  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1292
1293  // Check for second word int-int move
1294  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1295    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1296
1297  // Check for second word integer store
1298  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1299    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1300
1301  // Check for second word integer load
1302  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1303    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1304
1305
1306  Unimplemented();
1307  return 0; // Mute compiler
1308}
1309
1310#ifndef PRODUCT
1311void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1312  implementation( NULL, ra_, false, st );
1313}
1314#endif
1315
1316void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1317  implementation( &cbuf, ra_, false, NULL );
1318}
1319
1320uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1321  return implementation( NULL, ra_, true, NULL );
1322}
1323
1324
1325//=============================================================================
1326#ifndef PRODUCT
1327void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1328  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1329  int reg = ra_->get_reg_first(this);
1330  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1331}
1332#endif
1333
1334void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1335  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1336  int reg = ra_->get_encode(this);
1337  if( offset >= 128 ) {
1338    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1339    emit_rm(cbuf, 0x2, reg, 0x04);
1340    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1341    emit_d32(cbuf, offset);
1342  }
1343  else {
1344    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1345    emit_rm(cbuf, 0x1, reg, 0x04);
1346    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1347    emit_d8(cbuf, offset);
1348  }
1349}
1350
1351uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1352  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1353  if( offset >= 128 ) {
1354    return 7;
1355  }
1356  else {
1357    return 4;
1358  }
1359}
1360
1361//=============================================================================
1362#ifndef PRODUCT
1363void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1364  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1365  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1366  st->print_cr("\tNOP");
1367  st->print_cr("\tNOP");
1368  if( !OptoBreakpoint )
1369    st->print_cr("\tNOP");
1370}
1371#endif
1372
1373void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1374  MacroAssembler masm(&cbuf);
1375#ifdef ASSERT
1376  uint insts_size = cbuf.insts_size();
1377#endif
1378  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1379  masm.jump_cc(Assembler::notEqual,
1380               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1381  /* WARNING these NOPs are critical so that verified entry point is properly
1382     aligned for patching by NativeJump::patch_verified_entry() */
1383  int nops_cnt = 2;
1384  if( !OptoBreakpoint ) // Leave space for int3
1385     nops_cnt += 1;
1386  masm.nop(nops_cnt);
1387
1388  assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1389}
1390
1391uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1392  return OptoBreakpoint ? 11 : 12;
1393}
1394
1395
1396//=============================================================================
1397
1398int Matcher::regnum_to_fpu_offset(int regnum) {
1399  return regnum - 32; // The FP registers are in the second chunk
1400}
1401
1402// This is UltraSparc specific, true just means we have fast l2f conversion
1403const bool Matcher::convL2FSupported(void) {
1404  return true;
1405}
1406
1407// Is this branch offset short enough that a short branch can be used?
1408//
1409// NOTE: If the platform does not provide any short branch variants, then
1410//       this method should return false for offset 0.
1411bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1412  // The passed offset is relative to address of the branch.
1413  // On 86 a branch displacement is calculated relative to address
1414  // of a next instruction.
1415  offset -= br_size;
1416
1417  // the short version of jmpConUCF2 contains multiple branches,
1418  // making the reach slightly less
1419  if (rule == jmpConUCF2_rule)
1420    return (-126 <= offset && offset <= 125);
1421  return (-128 <= offset && offset <= 127);
1422}
1423
1424const bool Matcher::isSimpleConstant64(jlong value) {
1425  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1426  return false;
1427}
1428
1429// The ecx parameter to rep stos for the ClearArray node is in dwords.
1430const bool Matcher::init_array_count_is_in_bytes = false;
1431
1432// Needs 2 CMOV's for longs.
1433const int Matcher::long_cmove_cost() { return 1; }
1434
1435// No CMOVF/CMOVD with SSE/SSE2
1436const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1437
1438// Does the CPU require late expand (see block.cpp for description of late expand)?
1439const bool Matcher::require_postalloc_expand = false;
1440
1441// Do we need to mask the count passed to shift instructions or does
1442// the cpu only look at the lower 5/6 bits anyway?
1443const bool Matcher::need_masked_shift_count = false;
1444
1445bool Matcher::narrow_oop_use_complex_address() {
1446  ShouldNotCallThis();
1447  return true;
1448}
1449
1450bool Matcher::narrow_klass_use_complex_address() {
1451  ShouldNotCallThis();
1452  return true;
1453}
1454
1455bool Matcher::const_oop_prefer_decode() {
1456  ShouldNotCallThis();
1457  return true;
1458}
1459
1460bool Matcher::const_klass_prefer_decode() {
1461  ShouldNotCallThis();
1462  return true;
1463}
1464
1465// Is it better to copy float constants, or load them directly from memory?
1466// Intel can load a float constant from a direct address, requiring no
1467// extra registers.  Most RISCs will have to materialize an address into a
1468// register first, so they would do better to copy the constant from stack.
1469const bool Matcher::rematerialize_float_constants = true;
1470
1471// If CPU can load and store mis-aligned doubles directly then no fixup is
1472// needed.  Else we split the double into 2 integer pieces and move it
1473// piece-by-piece.  Only happens when passing doubles into C code as the
1474// Java calling convention forces doubles to be aligned.
1475const bool Matcher::misaligned_doubles_ok = true;
1476
1477
1478void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1479  // Get the memory operand from the node
1480  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1481  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1482  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1483  uint opcnt     = 1;                 // First operand
1484  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1485  while( idx >= skipped+num_edges ) {
1486    skipped += num_edges;
1487    opcnt++;                          // Bump operand count
1488    assert( opcnt < numopnds, "Accessing non-existent operand" );
1489    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1490  }
1491
1492  MachOper *memory = node->_opnds[opcnt];
1493  MachOper *new_memory = NULL;
1494  switch (memory->opcode()) {
1495  case DIRECT:
1496  case INDOFFSET32X:
1497    // No transformation necessary.
1498    return;
1499  case INDIRECT:
1500    new_memory = new indirect_win95_safeOper( );
1501    break;
1502  case INDOFFSET8:
1503    new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1504    break;
1505  case INDOFFSET32:
1506    new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1507    break;
1508  case INDINDEXOFFSET:
1509    new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1510    break;
1511  case INDINDEXSCALE:
1512    new_memory = new indIndexScale_win95_safeOper(memory->scale());
1513    break;
1514  case INDINDEXSCALEOFFSET:
1515    new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1516    break;
1517  case LOAD_LONG_INDIRECT:
1518  case LOAD_LONG_INDOFFSET32:
1519    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1520    return;
1521  default:
1522    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1523    return;
1524  }
1525  node->_opnds[opcnt] = new_memory;
1526}
1527
1528// Advertise here if the CPU requires explicit rounding operations
1529// to implement the UseStrictFP mode.
1530const bool Matcher::strict_fp_requires_explicit_rounding = true;
1531
1532// Are floats conerted to double when stored to stack during deoptimization?
1533// On x32 it is stored with convertion only when FPU is used for floats.
1534bool Matcher::float_in_double() { return (UseSSE == 0); }
1535
1536// Do ints take an entire long register or just half?
1537const bool Matcher::int_in_long = false;
1538
1539// Return whether or not this register is ever used as an argument.  This
1540// function is used on startup to build the trampoline stubs in generateOptoStub.
1541// Registers not mentioned will be killed by the VM call in the trampoline, and
1542// arguments in those registers not be available to the callee.
1543bool Matcher::can_be_java_arg( int reg ) {
1544  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1545  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1546  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1547  return false;
1548}
1549
1550bool Matcher::is_spillable_arg( int reg ) {
1551  return can_be_java_arg(reg);
1552}
1553
1554bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1555  // Use hardware integer DIV instruction when
1556  // it is faster than a code which use multiply.
1557  // Only when constant divisor fits into 32 bit
1558  // (min_jint is excluded to get only correct
1559  // positive 32 bit values from negative).
1560  return VM_Version::has_fast_idiv() &&
1561         (divisor == (int)divisor && divisor != min_jint);
1562}
1563
1564// Register for DIVI projection of divmodI
1565RegMask Matcher::divI_proj_mask() {
1566  return EAX_REG_mask();
1567}
1568
1569// Register for MODI projection of divmodI
1570RegMask Matcher::modI_proj_mask() {
1571  return EDX_REG_mask();
1572}
1573
1574// Register for DIVL projection of divmodL
1575RegMask Matcher::divL_proj_mask() {
1576  ShouldNotReachHere();
1577  return RegMask();
1578}
1579
1580// Register for MODL projection of divmodL
1581RegMask Matcher::modL_proj_mask() {
1582  ShouldNotReachHere();
1583  return RegMask();
1584}
1585
1586const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1587  return NO_REG_mask();
1588}
1589
1590// Returns true if the high 32 bits of the value is known to be zero.
1591bool is_operand_hi32_zero(Node* n) {
1592  int opc = n->Opcode();
1593  if (opc == Op_AndL) {
1594    Node* o2 = n->in(2);
1595    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1596      return true;
1597    }
1598  }
1599  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1600    return true;
1601  }
1602  return false;
1603}
1604
1605%}
1606
1607//----------ENCODING BLOCK-----------------------------------------------------
1608// This block specifies the encoding classes used by the compiler to output
1609// byte streams.  Encoding classes generate functions which are called by
1610// Machine Instruction Nodes in order to generate the bit encoding of the
1611// instruction.  Operands specify their base encoding interface with the
1612// interface keyword.  There are currently supported four interfaces,
1613// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1614// operand to generate a function which returns its register number when
1615// queried.   CONST_INTER causes an operand to generate a function which
1616// returns the value of the constant when queried.  MEMORY_INTER causes an
1617// operand to generate four functions which return the Base Register, the
1618// Index Register, the Scale Value, and the Offset Value of the operand when
1619// queried.  COND_INTER causes an operand to generate six functions which
1620// return the encoding code (ie - encoding bits for the instruction)
1621// associated with each basic boolean condition for a conditional instruction.
1622// Instructions specify two basic values for encoding.  They use the
1623// ins_encode keyword to specify their encoding class (which must be one of
1624// the class names specified in the encoding block), and they use the
1625// opcode keyword to specify, in order, their primary, secondary, and
1626// tertiary opcode.  Only the opcode sections which a particular instruction
1627// needs for encoding need to be specified.
1628encode %{
1629  // Build emit functions for each basic byte or larger field in the intel
1630  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1631  // code in the enc_class source block.  Emit functions will live in the
1632  // main source block for now.  In future, we can generalize this by
1633  // adding a syntax that specifies the sizes of fields in an order,
1634  // so that the adlc can build the emit functions automagically
1635
1636  // Emit primary opcode
1637  enc_class OpcP %{
1638    emit_opcode(cbuf, $primary);
1639  %}
1640
1641  // Emit secondary opcode
1642  enc_class OpcS %{
1643    emit_opcode(cbuf, $secondary);
1644  %}
1645
1646  // Emit opcode directly
1647  enc_class Opcode(immI d8) %{
1648    emit_opcode(cbuf, $d8$$constant);
1649  %}
1650
1651  enc_class SizePrefix %{
1652    emit_opcode(cbuf,0x66);
1653  %}
1654
1655  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1656    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1657  %}
1658
1659  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1660    emit_opcode(cbuf,$opcode$$constant);
1661    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1662  %}
1663
1664  enc_class mov_r32_imm0( rRegI dst ) %{
1665    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1666    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1667  %}
1668
1669  enc_class cdq_enc %{
1670    // Full implementation of Java idiv and irem; checks for
1671    // special case as described in JVM spec., p.243 & p.271.
1672    //
1673    //         normal case                           special case
1674    //
1675    // input : rax,: dividend                         min_int
1676    //         reg: divisor                          -1
1677    //
1678    // output: rax,: quotient  (= rax, idiv reg)       min_int
1679    //         rdx: remainder (= rax, irem reg)       0
1680    //
1681    //  Code sequnce:
1682    //
1683    //  81 F8 00 00 00 80    cmp         rax,80000000h
1684    //  0F 85 0B 00 00 00    jne         normal_case
1685    //  33 D2                xor         rdx,edx
1686    //  83 F9 FF             cmp         rcx,0FFh
1687    //  0F 84 03 00 00 00    je          done
1688    //                  normal_case:
1689    //  99                   cdq
1690    //  F7 F9                idiv        rax,ecx
1691    //                  done:
1692    //
1693    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1694    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1695    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1696    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1697    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1698    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1699    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1700    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1701    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1702    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1703    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1704    // normal_case:
1705    emit_opcode(cbuf,0x99);                                         // cdq
1706    // idiv (note: must be emitted by the user of this rule)
1707    // normal:
1708  %}
1709
1710  // Dense encoding for older common ops
1711  enc_class Opc_plus(immI opcode, rRegI reg) %{
1712    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1713  %}
1714
1715
1716  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1717  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1718    // Check for 8-bit immediate, and set sign extend bit in opcode
1719    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1720      emit_opcode(cbuf, $primary | 0x02);
1721    }
1722    else {                          // If 32-bit immediate
1723      emit_opcode(cbuf, $primary);
1724    }
1725  %}
1726
1727  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1728    // Emit primary opcode and set sign-extend bit
1729    // Check for 8-bit immediate, and set sign extend bit in opcode
1730    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1731      emit_opcode(cbuf, $primary | 0x02);    }
1732    else {                          // If 32-bit immediate
1733      emit_opcode(cbuf, $primary);
1734    }
1735    // Emit r/m byte with secondary opcode, after primary opcode.
1736    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1737  %}
1738
1739  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1740    // Check for 8-bit immediate, and set sign extend bit in opcode
1741    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1742      $$$emit8$imm$$constant;
1743    }
1744    else {                          // If 32-bit immediate
1745      // Output immediate
1746      $$$emit32$imm$$constant;
1747    }
1748  %}
1749
1750  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1751    // Emit primary opcode and set sign-extend bit
1752    // Check for 8-bit immediate, and set sign extend bit in opcode
1753    int con = (int)$imm$$constant; // Throw away top bits
1754    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1755    // Emit r/m byte with secondary opcode, after primary opcode.
1756    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1757    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1758    else                               emit_d32(cbuf,con);
1759  %}
1760
1761  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1762    // Emit primary opcode and set sign-extend bit
1763    // Check for 8-bit immediate, and set sign extend bit in opcode
1764    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1765    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1766    // Emit r/m byte with tertiary opcode, after primary opcode.
1767    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1768    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1769    else                               emit_d32(cbuf,con);
1770  %}
1771
1772  enc_class OpcSReg (rRegI dst) %{    // BSWAP
1773    emit_cc(cbuf, $secondary, $dst$$reg );
1774  %}
1775
1776  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1777    int destlo = $dst$$reg;
1778    int desthi = HIGH_FROM_LOW(destlo);
1779    // bswap lo
1780    emit_opcode(cbuf, 0x0F);
1781    emit_cc(cbuf, 0xC8, destlo);
1782    // bswap hi
1783    emit_opcode(cbuf, 0x0F);
1784    emit_cc(cbuf, 0xC8, desthi);
1785    // xchg lo and hi
1786    emit_opcode(cbuf, 0x87);
1787    emit_rm(cbuf, 0x3, destlo, desthi);
1788  %}
1789
1790  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1791    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1792  %}
1793
1794  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1795    $$$emit8$primary;
1796    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1797  %}
1798
1799  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1800    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1801    emit_d8(cbuf, op >> 8 );
1802    emit_d8(cbuf, op & 255);
1803  %}
1804
1805  // emulate a CMOV with a conditional branch around a MOV
1806  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1807    // Invert sense of branch from sense of CMOV
1808    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1809    emit_d8( cbuf, $brOffs$$constant );
1810  %}
1811
1812  enc_class enc_PartialSubtypeCheck( ) %{
1813    Register Redi = as_Register(EDI_enc); // result register
1814    Register Reax = as_Register(EAX_enc); // super class
1815    Register Recx = as_Register(ECX_enc); // killed
1816    Register Resi = as_Register(ESI_enc); // sub class
1817    Label miss;
1818
1819    MacroAssembler _masm(&cbuf);
1820    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1821                                     NULL, &miss,
1822                                     /*set_cond_codes:*/ true);
1823    if ($primary) {
1824      __ xorptr(Redi, Redi);
1825    }
1826    __ bind(miss);
1827  %}
1828
1829  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1830    MacroAssembler masm(&cbuf);
1831    int start = masm.offset();
1832    if (UseSSE >= 2) {
1833      if (VerifyFPU) {
1834        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1835      }
1836    } else {
1837      // External c_calling_convention expects the FPU stack to be 'clean'.
1838      // Compiled code leaves it dirty.  Do cleanup now.
1839      masm.empty_FPU_stack();
1840    }
1841    if (sizeof_FFree_Float_Stack_All == -1) {
1842      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1843    } else {
1844      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1845    }
1846  %}
1847
1848  enc_class Verify_FPU_For_Leaf %{
1849    if( VerifyFPU ) {
1850      MacroAssembler masm(&cbuf);
1851      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1852    }
1853  %}
1854
1855  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1856    // This is the instruction starting address for relocation info.
1857    cbuf.set_insts_mark();
1858    $$$emit8$primary;
1859    // CALL directly to the runtime
1860    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1861                runtime_call_Relocation::spec(), RELOC_IMM32 );
1862
1863    if (UseSSE >= 2) {
1864      MacroAssembler _masm(&cbuf);
1865      BasicType rt = tf()->return_type();
1866
1867      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1868        // A C runtime call where the return value is unused.  In SSE2+
1869        // mode the result needs to be removed from the FPU stack.  It's
1870        // likely that this function call could be removed by the
1871        // optimizer if the C function is a pure function.
1872        __ ffree(0);
1873      } else if (rt == T_FLOAT) {
1874        __ lea(rsp, Address(rsp, -4));
1875        __ fstp_s(Address(rsp, 0));
1876        __ movflt(xmm0, Address(rsp, 0));
1877        __ lea(rsp, Address(rsp,  4));
1878      } else if (rt == T_DOUBLE) {
1879        __ lea(rsp, Address(rsp, -8));
1880        __ fstp_d(Address(rsp, 0));
1881        __ movdbl(xmm0, Address(rsp, 0));
1882        __ lea(rsp, Address(rsp,  8));
1883      }
1884    }
1885  %}
1886
1887  enc_class pre_call_resets %{
1888    // If method sets FPU control word restore it here
1889    debug_only(int off0 = cbuf.insts_size());
1890    if (ra_->C->in_24_bit_fp_mode()) {
1891      MacroAssembler _masm(&cbuf);
1892      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1893    }
1894    // Clear upper bits of YMM registers when current compiled code uses
1895    // wide vectors to avoid AVX <-> SSE transition penalty during call.
1896    MacroAssembler _masm(&cbuf);
1897    __ vzeroupper();
1898    debug_only(int off1 = cbuf.insts_size());
1899    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1900  %}
1901
1902  enc_class post_call_FPU %{
1903    // If method sets FPU control word do it here also
1904    if (Compile::current()->in_24_bit_fp_mode()) {
1905      MacroAssembler masm(&cbuf);
1906      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1907    }
1908  %}
1909
1910  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1911    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1912    // who we intended to call.
1913    cbuf.set_insts_mark();
1914    $$$emit8$primary;
1915
1916    if (!_method) {
1917      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1918                     runtime_call_Relocation::spec(),
1919                     RELOC_IMM32);
1920    } else {
1921      int method_index = resolved_method_index(cbuf);
1922      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1923                                                  : static_call_Relocation::spec(method_index);
1924      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1925                     rspec, RELOC_DISP32);
1926      // Emit stubs for static call.
1927      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1928      if (stub == NULL) {
1929        ciEnv::current()->record_failure("CodeCache is full");
1930        return;
1931      }
1932    }
1933  %}
1934
1935  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1936    MacroAssembler _masm(&cbuf);
1937    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1938  %}
1939
1940  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1941    int disp = in_bytes(Method::from_compiled_offset());
1942    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1943
1944    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1945    cbuf.set_insts_mark();
1946    $$$emit8$primary;
1947    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1948    emit_d8(cbuf, disp);             // Displacement
1949
1950  %}
1951
1952//   Following encoding is no longer used, but may be restored if calling
1953//   convention changes significantly.
1954//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1955//
1956//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1957//     // int ic_reg     = Matcher::inline_cache_reg();
1958//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1959//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1960//     // int imo_encode = Matcher::_regEncode[imo_reg];
1961//
1962//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1963//     // // so we load it immediately before the call
1964//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1965//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1966//
1967//     // xor rbp,ebp
1968//     emit_opcode(cbuf, 0x33);
1969//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1970//
1971//     // CALL to interpreter.
1972//     cbuf.set_insts_mark();
1973//     $$$emit8$primary;
1974//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1975//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1976//   %}
1977
1978  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1979    $$$emit8$primary;
1980    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1981    $$$emit8$shift$$constant;
1982  %}
1983
1984  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1985    // Load immediate does not have a zero or sign extended version
1986    // for 8-bit immediates
1987    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1988    $$$emit32$src$$constant;
1989  %}
1990
1991  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1992    // Load immediate does not have a zero or sign extended version
1993    // for 8-bit immediates
1994    emit_opcode(cbuf, $primary + $dst$$reg);
1995    $$$emit32$src$$constant;
1996  %}
1997
1998  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1999    // Load immediate does not have a zero or sign extended version
2000    // for 8-bit immediates
2001    int dst_enc = $dst$$reg;
2002    int src_con = $src$$constant & 0x0FFFFFFFFL;
2003    if (src_con == 0) {
2004      // xor dst, dst
2005      emit_opcode(cbuf, 0x33);
2006      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2007    } else {
2008      emit_opcode(cbuf, $primary + dst_enc);
2009      emit_d32(cbuf, src_con);
2010    }
2011  %}
2012
2013  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2014    // Load immediate does not have a zero or sign extended version
2015    // for 8-bit immediates
2016    int dst_enc = $dst$$reg + 2;
2017    int src_con = ((julong)($src$$constant)) >> 32;
2018    if (src_con == 0) {
2019      // xor dst, dst
2020      emit_opcode(cbuf, 0x33);
2021      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2022    } else {
2023      emit_opcode(cbuf, $primary + dst_enc);
2024      emit_d32(cbuf, src_con);
2025    }
2026  %}
2027
2028
2029  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2030  enc_class enc_Copy( rRegI dst, rRegI src ) %{
2031    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2032  %}
2033
2034  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2035    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2036  %}
2037
2038  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2039    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2040  %}
2041
2042  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2043    $$$emit8$primary;
2044    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2045  %}
2046
2047  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2048    $$$emit8$secondary;
2049    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2050  %}
2051
2052  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2053    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2054  %}
2055
2056  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2057    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2058  %}
2059
2060  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2061    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2062  %}
2063
2064  enc_class Con32 (immI src) %{    // Con32(storeImmI)
2065    // Output immediate
2066    $$$emit32$src$$constant;
2067  %}
2068
2069  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2070    // Output Float immediate bits
2071    jfloat jf = $src$$constant;
2072    int    jf_as_bits = jint_cast( jf );
2073    emit_d32(cbuf, jf_as_bits);
2074  %}
2075
2076  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2077    // Output Float immediate bits
2078    jfloat jf = $src$$constant;
2079    int    jf_as_bits = jint_cast( jf );
2080    emit_d32(cbuf, jf_as_bits);
2081  %}
2082
2083  enc_class Con16 (immI src) %{    // Con16(storeImmI)
2084    // Output immediate
2085    $$$emit16$src$$constant;
2086  %}
2087
2088  enc_class Con_d32(immI src) %{
2089    emit_d32(cbuf,$src$$constant);
2090  %}
2091
2092  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2093    // Output immediate memory reference
2094    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2095    emit_d32(cbuf, 0x00);
2096  %}
2097
2098  enc_class lock_prefix( ) %{
2099    if( os::is_MP() )
2100      emit_opcode(cbuf,0xF0);         // [Lock]
2101  %}
2102
2103  // Cmp-xchg long value.
2104  // Note: we need to swap rbx, and rcx before and after the
2105  //       cmpxchg8 instruction because the instruction uses
2106  //       rcx as the high order word of the new value to store but
2107  //       our register encoding uses rbx,.
2108  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2109
2110    // XCHG  rbx,ecx
2111    emit_opcode(cbuf,0x87);
2112    emit_opcode(cbuf,0xD9);
2113    // [Lock]
2114    if( os::is_MP() )
2115      emit_opcode(cbuf,0xF0);
2116    // CMPXCHG8 [Eptr]
2117    emit_opcode(cbuf,0x0F);
2118    emit_opcode(cbuf,0xC7);
2119    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2120    // XCHG  rbx,ecx
2121    emit_opcode(cbuf,0x87);
2122    emit_opcode(cbuf,0xD9);
2123  %}
2124
2125  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2126    // [Lock]
2127    if( os::is_MP() )
2128      emit_opcode(cbuf,0xF0);
2129
2130    // CMPXCHG [Eptr]
2131    emit_opcode(cbuf,0x0F);
2132    emit_opcode(cbuf,0xB1);
2133    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2134  %}
2135
2136  enc_class enc_cmpxchgb(eSIRegP mem_ptr) %{
2137    // [Lock]
2138    if( os::is_MP() )
2139      emit_opcode(cbuf,0xF0);
2140
2141    // CMPXCHGB [Eptr]
2142    emit_opcode(cbuf,0x0F);
2143    emit_opcode(cbuf,0xB0);
2144    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2145  %}
2146
2147  enc_class enc_cmpxchgw(eSIRegP mem_ptr) %{
2148    // [Lock]
2149    if( os::is_MP() )
2150      emit_opcode(cbuf,0xF0);
2151
2152    // 16-bit mode
2153    emit_opcode(cbuf, 0x66);
2154
2155    // CMPXCHGW [Eptr]
2156    emit_opcode(cbuf,0x0F);
2157    emit_opcode(cbuf,0xB1);
2158    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2159  %}
2160
2161  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2162    int res_encoding = $res$$reg;
2163
2164    // MOV  res,0
2165    emit_opcode( cbuf, 0xB8 + res_encoding);
2166    emit_d32( cbuf, 0 );
2167    // JNE,s  fail
2168    emit_opcode(cbuf,0x75);
2169    emit_d8(cbuf, 5 );
2170    // MOV  res,1
2171    emit_opcode( cbuf, 0xB8 + res_encoding);
2172    emit_d32( cbuf, 1 );
2173    // fail:
2174  %}
2175
2176  enc_class set_instruction_start( ) %{
2177    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2178  %}
2179
2180  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2181    int reg_encoding = $ereg$$reg;
2182    int base  = $mem$$base;
2183    int index = $mem$$index;
2184    int scale = $mem$$scale;
2185    int displace = $mem$$disp;
2186    relocInfo::relocType disp_reloc = $mem->disp_reloc();
2187    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2188  %}
2189
2190  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2191    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2192    int base  = $mem$$base;
2193    int index = $mem$$index;
2194    int scale = $mem$$scale;
2195    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2196    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2197    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2198  %}
2199
2200  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2201    int r1, r2;
2202    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2203    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2204    emit_opcode(cbuf,0x0F);
2205    emit_opcode(cbuf,$tertiary);
2206    emit_rm(cbuf, 0x3, r1, r2);
2207    emit_d8(cbuf,$cnt$$constant);
2208    emit_d8(cbuf,$primary);
2209    emit_rm(cbuf, 0x3, $secondary, r1);
2210    emit_d8(cbuf,$cnt$$constant);
2211  %}
2212
2213  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2214    emit_opcode( cbuf, 0x8B ); // Move
2215    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2216    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2217      emit_d8(cbuf,$primary);
2218      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2219      emit_d8(cbuf,$cnt$$constant-32);
2220    }
2221    emit_d8(cbuf,$primary);
2222    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2223    emit_d8(cbuf,31);
2224  %}
2225
2226  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2227    int r1, r2;
2228    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2229    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2230
2231    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2232    emit_rm(cbuf, 0x3, r1, r2);
2233    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2234      emit_opcode(cbuf,$primary);
2235      emit_rm(cbuf, 0x3, $secondary, r1);
2236      emit_d8(cbuf,$cnt$$constant-32);
2237    }
2238    emit_opcode(cbuf,0x33);  // XOR r2,r2
2239    emit_rm(cbuf, 0x3, r2, r2);
2240  %}
2241
2242  // Clone of RegMem but accepts an extra parameter to access each
2243  // half of a double in memory; it never needs relocation info.
2244  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2245    emit_opcode(cbuf,$opcode$$constant);
2246    int reg_encoding = $rm_reg$$reg;
2247    int base     = $mem$$base;
2248    int index    = $mem$$index;
2249    int scale    = $mem$$scale;
2250    int displace = $mem$$disp + $disp_for_half$$constant;
2251    relocInfo::relocType disp_reloc = relocInfo::none;
2252    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2253  %}
2254
2255  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2256  //
2257  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2258  // and it never needs relocation information.
2259  // Frequently used to move data between FPU's Stack Top and memory.
2260  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2261    int rm_byte_opcode = $rm_opcode$$constant;
2262    int base     = $mem$$base;
2263    int index    = $mem$$index;
2264    int scale    = $mem$$scale;
2265    int displace = $mem$$disp;
2266    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2267    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2268  %}
2269
2270  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2271    int rm_byte_opcode = $rm_opcode$$constant;
2272    int base     = $mem$$base;
2273    int index    = $mem$$index;
2274    int scale    = $mem$$scale;
2275    int displace = $mem$$disp;
2276    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2277    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2278  %}
2279
2280  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2281    int reg_encoding = $dst$$reg;
2282    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2283    int index        = 0x04;            // 0x04 indicates no index
2284    int scale        = 0x00;            // 0x00 indicates no scale
2285    int displace     = $src1$$constant; // 0x00 indicates no displacement
2286    relocInfo::relocType disp_reloc = relocInfo::none;
2287    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2288  %}
2289
2290  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2291    // Compare dst,src
2292    emit_opcode(cbuf,0x3B);
2293    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2294    // jmp dst < src around move
2295    emit_opcode(cbuf,0x7C);
2296    emit_d8(cbuf,2);
2297    // move dst,src
2298    emit_opcode(cbuf,0x8B);
2299    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2300  %}
2301
2302  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2303    // Compare dst,src
2304    emit_opcode(cbuf,0x3B);
2305    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2306    // jmp dst > src around move
2307    emit_opcode(cbuf,0x7F);
2308    emit_d8(cbuf,2);
2309    // move dst,src
2310    emit_opcode(cbuf,0x8B);
2311    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2312  %}
2313
2314  enc_class enc_FPR_store(memory mem, regDPR src) %{
2315    // If src is FPR1, we can just FST to store it.
2316    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2317    int reg_encoding = 0x2; // Just store
2318    int base  = $mem$$base;
2319    int index = $mem$$index;
2320    int scale = $mem$$scale;
2321    int displace = $mem$$disp;
2322    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2323    if( $src$$reg != FPR1L_enc ) {
2324      reg_encoding = 0x3;  // Store & pop
2325      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2326      emit_d8( cbuf, 0xC0-1+$src$$reg );
2327    }
2328    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2329    emit_opcode(cbuf,$primary);
2330    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2331  %}
2332
2333  enc_class neg_reg(rRegI dst) %{
2334    // NEG $dst
2335    emit_opcode(cbuf,0xF7);
2336    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2337  %}
2338
2339  enc_class setLT_reg(eCXRegI dst) %{
2340    // SETLT $dst
2341    emit_opcode(cbuf,0x0F);
2342    emit_opcode(cbuf,0x9C);
2343    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2344  %}
2345
2346  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2347    int tmpReg = $tmp$$reg;
2348
2349    // SUB $p,$q
2350    emit_opcode(cbuf,0x2B);
2351    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2352    // SBB $tmp,$tmp
2353    emit_opcode(cbuf,0x1B);
2354    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2355    // AND $tmp,$y
2356    emit_opcode(cbuf,0x23);
2357    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2358    // ADD $p,$tmp
2359    emit_opcode(cbuf,0x03);
2360    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2361  %}
2362
2363  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2364    // TEST shift,32
2365    emit_opcode(cbuf,0xF7);
2366    emit_rm(cbuf, 0x3, 0, ECX_enc);
2367    emit_d32(cbuf,0x20);
2368    // JEQ,s small
2369    emit_opcode(cbuf, 0x74);
2370    emit_d8(cbuf, 0x04);
2371    // MOV    $dst.hi,$dst.lo
2372    emit_opcode( cbuf, 0x8B );
2373    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2374    // CLR    $dst.lo
2375    emit_opcode(cbuf, 0x33);
2376    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2377// small:
2378    // SHLD   $dst.hi,$dst.lo,$shift
2379    emit_opcode(cbuf,0x0F);
2380    emit_opcode(cbuf,0xA5);
2381    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2382    // SHL    $dst.lo,$shift"
2383    emit_opcode(cbuf,0xD3);
2384    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2385  %}
2386
2387  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2388    // TEST shift,32
2389    emit_opcode(cbuf,0xF7);
2390    emit_rm(cbuf, 0x3, 0, ECX_enc);
2391    emit_d32(cbuf,0x20);
2392    // JEQ,s small
2393    emit_opcode(cbuf, 0x74);
2394    emit_d8(cbuf, 0x04);
2395    // MOV    $dst.lo,$dst.hi
2396    emit_opcode( cbuf, 0x8B );
2397    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2398    // CLR    $dst.hi
2399    emit_opcode(cbuf, 0x33);
2400    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2401// small:
2402    // SHRD   $dst.lo,$dst.hi,$shift
2403    emit_opcode(cbuf,0x0F);
2404    emit_opcode(cbuf,0xAD);
2405    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2406    // SHR    $dst.hi,$shift"
2407    emit_opcode(cbuf,0xD3);
2408    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2409  %}
2410
2411  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2412    // TEST shift,32
2413    emit_opcode(cbuf,0xF7);
2414    emit_rm(cbuf, 0x3, 0, ECX_enc);
2415    emit_d32(cbuf,0x20);
2416    // JEQ,s small
2417    emit_opcode(cbuf, 0x74);
2418    emit_d8(cbuf, 0x05);
2419    // MOV    $dst.lo,$dst.hi
2420    emit_opcode( cbuf, 0x8B );
2421    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2422    // SAR    $dst.hi,31
2423    emit_opcode(cbuf, 0xC1);
2424    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2425    emit_d8(cbuf, 0x1F );
2426// small:
2427    // SHRD   $dst.lo,$dst.hi,$shift
2428    emit_opcode(cbuf,0x0F);
2429    emit_opcode(cbuf,0xAD);
2430    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2431    // SAR    $dst.hi,$shift"
2432    emit_opcode(cbuf,0xD3);
2433    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2434  %}
2435
2436
2437  // ----------------- Encodings for floating point unit -----------------
2438  // May leave result in FPU-TOS or FPU reg depending on opcodes
2439  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2440    $$$emit8$primary;
2441    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2442  %}
2443
2444  // Pop argument in FPR0 with FSTP ST(0)
2445  enc_class PopFPU() %{
2446    emit_opcode( cbuf, 0xDD );
2447    emit_d8( cbuf, 0xD8 );
2448  %}
2449
2450  // !!!!! equivalent to Pop_Reg_F
2451  enc_class Pop_Reg_DPR( regDPR dst ) %{
2452    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2453    emit_d8( cbuf, 0xD8+$dst$$reg );
2454  %}
2455
2456  enc_class Push_Reg_DPR( regDPR dst ) %{
2457    emit_opcode( cbuf, 0xD9 );
2458    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2459  %}
2460
2461  enc_class strictfp_bias1( regDPR dst ) %{
2462    emit_opcode( cbuf, 0xDB );           // FLD m80real
2463    emit_opcode( cbuf, 0x2D );
2464    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2465    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2466    emit_opcode( cbuf, 0xC8+$dst$$reg );
2467  %}
2468
2469  enc_class strictfp_bias2( regDPR dst ) %{
2470    emit_opcode( cbuf, 0xDB );           // FLD m80real
2471    emit_opcode( cbuf, 0x2D );
2472    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2473    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2474    emit_opcode( cbuf, 0xC8+$dst$$reg );
2475  %}
2476
2477  // Special case for moving an integer register to a stack slot.
2478  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2479    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2480  %}
2481
2482  // Special case for moving a register to a stack slot.
2483  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2484    // Opcode already emitted
2485    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2486    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2487    emit_d32(cbuf, $dst$$disp);   // Displacement
2488  %}
2489
2490  // Push the integer in stackSlot 'src' onto FP-stack
2491  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2492    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2493  %}
2494
2495  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2496  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2497    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2498  %}
2499
2500  // Same as Pop_Mem_F except for opcode
2501  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2502  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2503    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2504  %}
2505
2506  enc_class Pop_Reg_FPR( regFPR dst ) %{
2507    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2508    emit_d8( cbuf, 0xD8+$dst$$reg );
2509  %}
2510
2511  enc_class Push_Reg_FPR( regFPR dst ) %{
2512    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2513    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2514  %}
2515
2516  // Push FPU's float to a stack-slot, and pop FPU-stack
2517  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2518    int pop = 0x02;
2519    if ($src$$reg != FPR1L_enc) {
2520      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2521      emit_d8( cbuf, 0xC0-1+$src$$reg );
2522      pop = 0x03;
2523    }
2524    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2525  %}
2526
2527  // Push FPU's double to a stack-slot, and pop FPU-stack
2528  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2529    int pop = 0x02;
2530    if ($src$$reg != FPR1L_enc) {
2531      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2532      emit_d8( cbuf, 0xC0-1+$src$$reg );
2533      pop = 0x03;
2534    }
2535    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2536  %}
2537
2538  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2539  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2540    int pop = 0xD0 - 1; // -1 since we skip FLD
2541    if ($src$$reg != FPR1L_enc) {
2542      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2543      emit_d8( cbuf, 0xC0-1+$src$$reg );
2544      pop = 0xD8;
2545    }
2546    emit_opcode( cbuf, 0xDD );
2547    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2548  %}
2549
2550
2551  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2552    // load dst in FPR0
2553    emit_opcode( cbuf, 0xD9 );
2554    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2555    if ($src$$reg != FPR1L_enc) {
2556      // fincstp
2557      emit_opcode (cbuf, 0xD9);
2558      emit_opcode (cbuf, 0xF7);
2559      // swap src with FPR1:
2560      // FXCH FPR1 with src
2561      emit_opcode(cbuf, 0xD9);
2562      emit_d8(cbuf, 0xC8-1+$src$$reg );
2563      // fdecstp
2564      emit_opcode (cbuf, 0xD9);
2565      emit_opcode (cbuf, 0xF6);
2566    }
2567  %}
2568
2569  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2570    MacroAssembler _masm(&cbuf);
2571    __ subptr(rsp, 8);
2572    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2573    __ fld_d(Address(rsp, 0));
2574    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2575    __ fld_d(Address(rsp, 0));
2576  %}
2577
2578  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2579    MacroAssembler _masm(&cbuf);
2580    __ subptr(rsp, 4);
2581    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2582    __ fld_s(Address(rsp, 0));
2583    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2584    __ fld_s(Address(rsp, 0));
2585  %}
2586
2587  enc_class Push_ResultD(regD dst) %{
2588    MacroAssembler _masm(&cbuf);
2589    __ fstp_d(Address(rsp, 0));
2590    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2591    __ addptr(rsp, 8);
2592  %}
2593
2594  enc_class Push_ResultF(regF dst, immI d8) %{
2595    MacroAssembler _masm(&cbuf);
2596    __ fstp_s(Address(rsp, 0));
2597    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2598    __ addptr(rsp, $d8$$constant);
2599  %}
2600
2601  enc_class Push_SrcD(regD src) %{
2602    MacroAssembler _masm(&cbuf);
2603    __ subptr(rsp, 8);
2604    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2605    __ fld_d(Address(rsp, 0));
2606  %}
2607
2608  enc_class push_stack_temp_qword() %{
2609    MacroAssembler _masm(&cbuf);
2610    __ subptr(rsp, 8);
2611  %}
2612
2613  enc_class pop_stack_temp_qword() %{
2614    MacroAssembler _masm(&cbuf);
2615    __ addptr(rsp, 8);
2616  %}
2617
2618  enc_class push_xmm_to_fpr1(regD src) %{
2619    MacroAssembler _masm(&cbuf);
2620    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2621    __ fld_d(Address(rsp, 0));
2622  %}
2623
2624  enc_class Push_Result_Mod_DPR( regDPR src) %{
2625    if ($src$$reg != FPR1L_enc) {
2626      // fincstp
2627      emit_opcode (cbuf, 0xD9);
2628      emit_opcode (cbuf, 0xF7);
2629      // FXCH FPR1 with src
2630      emit_opcode(cbuf, 0xD9);
2631      emit_d8(cbuf, 0xC8-1+$src$$reg );
2632      // fdecstp
2633      emit_opcode (cbuf, 0xD9);
2634      emit_opcode (cbuf, 0xF6);
2635    }
2636    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2637    // // FSTP   FPR$dst$$reg
2638    // emit_opcode( cbuf, 0xDD );
2639    // emit_d8( cbuf, 0xD8+$dst$$reg );
2640  %}
2641
2642  enc_class fnstsw_sahf_skip_parity() %{
2643    // fnstsw ax
2644    emit_opcode( cbuf, 0xDF );
2645    emit_opcode( cbuf, 0xE0 );
2646    // sahf
2647    emit_opcode( cbuf, 0x9E );
2648    // jnp  ::skip
2649    emit_opcode( cbuf, 0x7B );
2650    emit_opcode( cbuf, 0x05 );
2651  %}
2652
2653  enc_class emitModDPR() %{
2654    // fprem must be iterative
2655    // :: loop
2656    // fprem
2657    emit_opcode( cbuf, 0xD9 );
2658    emit_opcode( cbuf, 0xF8 );
2659    // wait
2660    emit_opcode( cbuf, 0x9b );
2661    // fnstsw ax
2662    emit_opcode( cbuf, 0xDF );
2663    emit_opcode( cbuf, 0xE0 );
2664    // sahf
2665    emit_opcode( cbuf, 0x9E );
2666    // jp  ::loop
2667    emit_opcode( cbuf, 0x0F );
2668    emit_opcode( cbuf, 0x8A );
2669    emit_opcode( cbuf, 0xF4 );
2670    emit_opcode( cbuf, 0xFF );
2671    emit_opcode( cbuf, 0xFF );
2672    emit_opcode( cbuf, 0xFF );
2673  %}
2674
2675  enc_class fpu_flags() %{
2676    // fnstsw_ax
2677    emit_opcode( cbuf, 0xDF);
2678    emit_opcode( cbuf, 0xE0);
2679    // test ax,0x0400
2680    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2681    emit_opcode( cbuf, 0xA9 );
2682    emit_d16   ( cbuf, 0x0400 );
2683    // // // This sequence works, but stalls for 12-16 cycles on PPro
2684    // // test rax,0x0400
2685    // emit_opcode( cbuf, 0xA9 );
2686    // emit_d32   ( cbuf, 0x00000400 );
2687    //
2688    // jz exit (no unordered comparison)
2689    emit_opcode( cbuf, 0x74 );
2690    emit_d8    ( cbuf, 0x02 );
2691    // mov ah,1 - treat as LT case (set carry flag)
2692    emit_opcode( cbuf, 0xB4 );
2693    emit_d8    ( cbuf, 0x01 );
2694    // sahf
2695    emit_opcode( cbuf, 0x9E);
2696  %}
2697
2698  enc_class cmpF_P6_fixup() %{
2699    // Fixup the integer flags in case comparison involved a NaN
2700    //
2701    // JNP exit (no unordered comparison, P-flag is set by NaN)
2702    emit_opcode( cbuf, 0x7B );
2703    emit_d8    ( cbuf, 0x03 );
2704    // MOV AH,1 - treat as LT case (set carry flag)
2705    emit_opcode( cbuf, 0xB4 );
2706    emit_d8    ( cbuf, 0x01 );
2707    // SAHF
2708    emit_opcode( cbuf, 0x9E);
2709    // NOP     // target for branch to avoid branch to branch
2710    emit_opcode( cbuf, 0x90);
2711  %}
2712
2713//     fnstsw_ax();
2714//     sahf();
2715//     movl(dst, nan_result);
2716//     jcc(Assembler::parity, exit);
2717//     movl(dst, less_result);
2718//     jcc(Assembler::below, exit);
2719//     movl(dst, equal_result);
2720//     jcc(Assembler::equal, exit);
2721//     movl(dst, greater_result);
2722
2723// less_result     =  1;
2724// greater_result  = -1;
2725// equal_result    = 0;
2726// nan_result      = -1;
2727
2728  enc_class CmpF_Result(rRegI dst) %{
2729    // fnstsw_ax();
2730    emit_opcode( cbuf, 0xDF);
2731    emit_opcode( cbuf, 0xE0);
2732    // sahf
2733    emit_opcode( cbuf, 0x9E);
2734    // movl(dst, nan_result);
2735    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2736    emit_d32( cbuf, -1 );
2737    // jcc(Assembler::parity, exit);
2738    emit_opcode( cbuf, 0x7A );
2739    emit_d8    ( cbuf, 0x13 );
2740    // movl(dst, less_result);
2741    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2742    emit_d32( cbuf, -1 );
2743    // jcc(Assembler::below, exit);
2744    emit_opcode( cbuf, 0x72 );
2745    emit_d8    ( cbuf, 0x0C );
2746    // movl(dst, equal_result);
2747    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2748    emit_d32( cbuf, 0 );
2749    // jcc(Assembler::equal, exit);
2750    emit_opcode( cbuf, 0x74 );
2751    emit_d8    ( cbuf, 0x05 );
2752    // movl(dst, greater_result);
2753    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2754    emit_d32( cbuf, 1 );
2755  %}
2756
2757
2758  // Compare the longs and set flags
2759  // BROKEN!  Do Not use as-is
2760  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2761    // CMP    $src1.hi,$src2.hi
2762    emit_opcode( cbuf, 0x3B );
2763    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2764    // JNE,s  done
2765    emit_opcode(cbuf,0x75);
2766    emit_d8(cbuf, 2 );
2767    // CMP    $src1.lo,$src2.lo
2768    emit_opcode( cbuf, 0x3B );
2769    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2770// done:
2771  %}
2772
2773  enc_class convert_int_long( regL dst, rRegI src ) %{
2774    // mov $dst.lo,$src
2775    int dst_encoding = $dst$$reg;
2776    int src_encoding = $src$$reg;
2777    encode_Copy( cbuf, dst_encoding  , src_encoding );
2778    // mov $dst.hi,$src
2779    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2780    // sar $dst.hi,31
2781    emit_opcode( cbuf, 0xC1 );
2782    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2783    emit_d8(cbuf, 0x1F );
2784  %}
2785
2786  enc_class convert_long_double( eRegL src ) %{
2787    // push $src.hi
2788    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2789    // push $src.lo
2790    emit_opcode(cbuf, 0x50+$src$$reg  );
2791    // fild 64-bits at [SP]
2792    emit_opcode(cbuf,0xdf);
2793    emit_d8(cbuf, 0x6C);
2794    emit_d8(cbuf, 0x24);
2795    emit_d8(cbuf, 0x00);
2796    // pop stack
2797    emit_opcode(cbuf, 0x83); // add  SP, #8
2798    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2799    emit_d8(cbuf, 0x8);
2800  %}
2801
2802  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2803    // IMUL   EDX:EAX,$src1
2804    emit_opcode( cbuf, 0xF7 );
2805    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2806    // SAR    EDX,$cnt-32
2807    int shift_count = ((int)$cnt$$constant) - 32;
2808    if (shift_count > 0) {
2809      emit_opcode(cbuf, 0xC1);
2810      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2811      emit_d8(cbuf, shift_count);
2812    }
2813  %}
2814
2815  // this version doesn't have add sp, 8
2816  enc_class convert_long_double2( eRegL src ) %{
2817    // push $src.hi
2818    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2819    // push $src.lo
2820    emit_opcode(cbuf, 0x50+$src$$reg  );
2821    // fild 64-bits at [SP]
2822    emit_opcode(cbuf,0xdf);
2823    emit_d8(cbuf, 0x6C);
2824    emit_d8(cbuf, 0x24);
2825    emit_d8(cbuf, 0x00);
2826  %}
2827
2828  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2829    // Basic idea: long = (long)int * (long)int
2830    // IMUL EDX:EAX, src
2831    emit_opcode( cbuf, 0xF7 );
2832    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2833  %}
2834
2835  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2836    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2837    // MUL EDX:EAX, src
2838    emit_opcode( cbuf, 0xF7 );
2839    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2840  %}
2841
2842  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2843    // Basic idea: lo(result) = lo(x_lo * y_lo)
2844    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2845    // MOV    $tmp,$src.lo
2846    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2847    // IMUL   $tmp,EDX
2848    emit_opcode( cbuf, 0x0F );
2849    emit_opcode( cbuf, 0xAF );
2850    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2851    // MOV    EDX,$src.hi
2852    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2853    // IMUL   EDX,EAX
2854    emit_opcode( cbuf, 0x0F );
2855    emit_opcode( cbuf, 0xAF );
2856    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2857    // ADD    $tmp,EDX
2858    emit_opcode( cbuf, 0x03 );
2859    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2860    // MUL   EDX:EAX,$src.lo
2861    emit_opcode( cbuf, 0xF7 );
2862    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2863    // ADD    EDX,ESI
2864    emit_opcode( cbuf, 0x03 );
2865    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2866  %}
2867
2868  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2869    // Basic idea: lo(result) = lo(src * y_lo)
2870    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2871    // IMUL   $tmp,EDX,$src
2872    emit_opcode( cbuf, 0x6B );
2873    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2874    emit_d8( cbuf, (int)$src$$constant );
2875    // MOV    EDX,$src
2876    emit_opcode(cbuf, 0xB8 + EDX_enc);
2877    emit_d32( cbuf, (int)$src$$constant );
2878    // MUL   EDX:EAX,EDX
2879    emit_opcode( cbuf, 0xF7 );
2880    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2881    // ADD    EDX,ESI
2882    emit_opcode( cbuf, 0x03 );
2883    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2884  %}
2885
2886  enc_class long_div( eRegL src1, eRegL src2 ) %{
2887    // PUSH src1.hi
2888    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2889    // PUSH src1.lo
2890    emit_opcode(cbuf,               0x50+$src1$$reg  );
2891    // PUSH src2.hi
2892    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2893    // PUSH src2.lo
2894    emit_opcode(cbuf,               0x50+$src2$$reg  );
2895    // CALL directly to the runtime
2896    cbuf.set_insts_mark();
2897    emit_opcode(cbuf,0xE8);       // Call into runtime
2898    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2899    // Restore stack
2900    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2901    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2902    emit_d8(cbuf, 4*4);
2903  %}
2904
2905  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2906    // PUSH src1.hi
2907    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2908    // PUSH src1.lo
2909    emit_opcode(cbuf,               0x50+$src1$$reg  );
2910    // PUSH src2.hi
2911    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2912    // PUSH src2.lo
2913    emit_opcode(cbuf,               0x50+$src2$$reg  );
2914    // CALL directly to the runtime
2915    cbuf.set_insts_mark();
2916    emit_opcode(cbuf,0xE8);       // Call into runtime
2917    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2918    // Restore stack
2919    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2920    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2921    emit_d8(cbuf, 4*4);
2922  %}
2923
2924  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2925    // MOV   $tmp,$src.lo
2926    emit_opcode(cbuf, 0x8B);
2927    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2928    // OR    $tmp,$src.hi
2929    emit_opcode(cbuf, 0x0B);
2930    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2931  %}
2932
2933  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2934    // CMP    $src1.lo,$src2.lo
2935    emit_opcode( cbuf, 0x3B );
2936    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2937    // JNE,s  skip
2938    emit_cc(cbuf, 0x70, 0x5);
2939    emit_d8(cbuf,2);
2940    // CMP    $src1.hi,$src2.hi
2941    emit_opcode( cbuf, 0x3B );
2942    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2943  %}
2944
2945  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2946    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2947    emit_opcode( cbuf, 0x3B );
2948    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2949    // MOV    $tmp,$src1.hi
2950    emit_opcode( cbuf, 0x8B );
2951    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2952    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2953    emit_opcode( cbuf, 0x1B );
2954    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2955  %}
2956
2957  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2958    // XOR    $tmp,$tmp
2959    emit_opcode(cbuf,0x33);  // XOR
2960    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2961    // CMP    $tmp,$src.lo
2962    emit_opcode( cbuf, 0x3B );
2963    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2964    // SBB    $tmp,$src.hi
2965    emit_opcode( cbuf, 0x1B );
2966    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2967  %}
2968
2969 // Sniff, sniff... smells like Gnu Superoptimizer
2970  enc_class neg_long( eRegL dst ) %{
2971    emit_opcode(cbuf,0xF7);    // NEG hi
2972    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2973    emit_opcode(cbuf,0xF7);    // NEG lo
2974    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2975    emit_opcode(cbuf,0x83);    // SBB hi,0
2976    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2977    emit_d8    (cbuf,0 );
2978  %}
2979
2980  enc_class enc_pop_rdx() %{
2981    emit_opcode(cbuf,0x5A);
2982  %}
2983
2984  enc_class enc_rethrow() %{
2985    cbuf.set_insts_mark();
2986    emit_opcode(cbuf, 0xE9);        // jmp    entry
2987    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2988                   runtime_call_Relocation::spec(), RELOC_IMM32 );
2989  %}
2990
2991
2992  // Convert a double to an int.  Java semantics require we do complex
2993  // manglelations in the corner cases.  So we set the rounding mode to
2994  // 'zero', store the darned double down as an int, and reset the
2995  // rounding mode to 'nearest'.  The hardware throws an exception which
2996  // patches up the correct value directly to the stack.
2997  enc_class DPR2I_encoding( regDPR src ) %{
2998    // Flip to round-to-zero mode.  We attempted to allow invalid-op
2999    // exceptions here, so that a NAN or other corner-case value will
3000    // thrown an exception (but normal values get converted at full speed).
3001    // However, I2C adapters and other float-stack manglers leave pending
3002    // invalid-op exceptions hanging.  We would have to clear them before
3003    // enabling them and that is more expensive than just testing for the
3004    // invalid value Intel stores down in the corner cases.
3005    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3006    emit_opcode(cbuf,0x2D);
3007    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3008    // Allocate a word
3009    emit_opcode(cbuf,0x83);            // SUB ESP,4
3010    emit_opcode(cbuf,0xEC);
3011    emit_d8(cbuf,0x04);
3012    // Encoding assumes a double has been pushed into FPR0.
3013    // Store down the double as an int, popping the FPU stack
3014    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3015    emit_opcode(cbuf,0x1C);
3016    emit_d8(cbuf,0x24);
3017    // Restore the rounding mode; mask the exception
3018    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3019    emit_opcode(cbuf,0x2D);
3020    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3021        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3022        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3023
3024    // Load the converted int; adjust CPU stack
3025    emit_opcode(cbuf,0x58);       // POP EAX
3026    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3027    emit_d32   (cbuf,0x80000000); //         0x80000000
3028    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3029    emit_d8    (cbuf,0x07);       // Size of slow_call
3030    // Push src onto stack slow-path
3031    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3032    emit_d8    (cbuf,0xC0-1+$src$$reg );
3033    // CALL directly to the runtime
3034    cbuf.set_insts_mark();
3035    emit_opcode(cbuf,0xE8);       // Call into runtime
3036    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3037    // Carry on here...
3038  %}
3039
3040  enc_class DPR2L_encoding( regDPR src ) %{
3041    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3042    emit_opcode(cbuf,0x2D);
3043    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3044    // Allocate a word
3045    emit_opcode(cbuf,0x83);            // SUB ESP,8
3046    emit_opcode(cbuf,0xEC);
3047    emit_d8(cbuf,0x08);
3048    // Encoding assumes a double has been pushed into FPR0.
3049    // Store down the double as a long, popping the FPU stack
3050    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3051    emit_opcode(cbuf,0x3C);
3052    emit_d8(cbuf,0x24);
3053    // Restore the rounding mode; mask the exception
3054    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3055    emit_opcode(cbuf,0x2D);
3056    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3057        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3058        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3059
3060    // Load the converted int; adjust CPU stack
3061    emit_opcode(cbuf,0x58);       // POP EAX
3062    emit_opcode(cbuf,0x5A);       // POP EDX
3063    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3064    emit_d8    (cbuf,0xFA);       // rdx
3065    emit_d32   (cbuf,0x80000000); //         0x80000000
3066    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3067    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3068    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3069    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3070    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3071    emit_d8    (cbuf,0x07);       // Size of slow_call
3072    // Push src onto stack slow-path
3073    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3074    emit_d8    (cbuf,0xC0-1+$src$$reg );
3075    // CALL directly to the runtime
3076    cbuf.set_insts_mark();
3077    emit_opcode(cbuf,0xE8);       // Call into runtime
3078    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3079    // Carry on here...
3080  %}
3081
3082  enc_class FMul_ST_reg( eRegFPR src1 ) %{
3083    // Operand was loaded from memory into fp ST (stack top)
3084    // FMUL   ST,$src  /* D8 C8+i */
3085    emit_opcode(cbuf, 0xD8);
3086    emit_opcode(cbuf, 0xC8 + $src1$$reg);
3087  %}
3088
3089  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3090    // FADDP  ST,src2  /* D8 C0+i */
3091    emit_opcode(cbuf, 0xD8);
3092    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3093    //could use FADDP  src2,fpST  /* DE C0+i */
3094  %}
3095
3096  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3097    // FADDP  src2,ST  /* DE C0+i */
3098    emit_opcode(cbuf, 0xDE);
3099    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3100  %}
3101
3102  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3103    // Operand has been loaded into fp ST (stack top)
3104      // FSUB   ST,$src1
3105      emit_opcode(cbuf, 0xD8);
3106      emit_opcode(cbuf, 0xE0 + $src1$$reg);
3107
3108      // FDIV
3109      emit_opcode(cbuf, 0xD8);
3110      emit_opcode(cbuf, 0xF0 + $src2$$reg);
3111  %}
3112
3113  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3114    // Operand was loaded from memory into fp ST (stack top)
3115    // FADD   ST,$src  /* D8 C0+i */
3116    emit_opcode(cbuf, 0xD8);
3117    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3118
3119    // FMUL  ST,src2  /* D8 C*+i */
3120    emit_opcode(cbuf, 0xD8);
3121    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3122  %}
3123
3124
3125  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3126    // Operand was loaded from memory into fp ST (stack top)
3127    // FADD   ST,$src  /* D8 C0+i */
3128    emit_opcode(cbuf, 0xD8);
3129    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3130
3131    // FMULP  src2,ST  /* DE C8+i */
3132    emit_opcode(cbuf, 0xDE);
3133    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3134  %}
3135
3136  // Atomically load the volatile long
3137  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3138    emit_opcode(cbuf,0xDF);
3139    int rm_byte_opcode = 0x05;
3140    int base     = $mem$$base;
3141    int index    = $mem$$index;
3142    int scale    = $mem$$scale;
3143    int displace = $mem$$disp;
3144    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3145    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3146    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3147  %}
3148
3149  // Volatile Store Long.  Must be atomic, so move it into
3150  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3151  // target address before the store (for null-ptr checks)
3152  // so the memory operand is used twice in the encoding.
3153  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3154    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3155    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3156    emit_opcode(cbuf,0xDF);
3157    int rm_byte_opcode = 0x07;
3158    int base     = $mem$$base;
3159    int index    = $mem$$index;
3160    int scale    = $mem$$scale;
3161    int displace = $mem$$disp;
3162    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3163    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3164  %}
3165
3166  // Safepoint Poll.  This polls the safepoint page, and causes an
3167  // exception if it is not readable. Unfortunately, it kills the condition code
3168  // in the process
3169  // We current use TESTL [spp],EDI
3170  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3171
3172  enc_class Safepoint_Poll() %{
3173    cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3174    emit_opcode(cbuf,0x85);
3175    emit_rm (cbuf, 0x0, 0x7, 0x5);
3176    emit_d32(cbuf, (intptr_t)os::get_polling_page());
3177  %}
3178%}
3179
3180
3181//----------FRAME--------------------------------------------------------------
3182// Definition of frame structure and management information.
3183//
3184//  S T A C K   L A Y O U T    Allocators stack-slot number
3185//                             |   (to get allocators register number
3186//  G  Owned by    |        |  v    add OptoReg::stack0())
3187//  r   CALLER     |        |
3188//  o     |        +--------+      pad to even-align allocators stack-slot
3189//  w     V        |  pad0  |        numbers; owned by CALLER
3190//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3191//  h     ^        |   in   |  5
3192//        |        |  args  |  4   Holes in incoming args owned by SELF
3193//  |     |        |        |  3
3194//  |     |        +--------+
3195//  V     |        | old out|      Empty on Intel, window on Sparc
3196//        |    old |preserve|      Must be even aligned.
3197//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3198//        |        |   in   |  3   area for Intel ret address
3199//     Owned by    |preserve|      Empty on Sparc.
3200//       SELF      +--------+
3201//        |        |  pad2  |  2   pad to align old SP
3202//        |        +--------+  1
3203//        |        | locks  |  0
3204//        |        +--------+----> OptoReg::stack0(), even aligned
3205//        |        |  pad1  | 11   pad to align new SP
3206//        |        +--------+
3207//        |        |        | 10
3208//        |        | spills |  9   spills
3209//        V        |        |  8   (pad0 slot for callee)
3210//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3211//        ^        |  out   |  7
3212//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3213//     Owned by    +--------+
3214//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3215//        |    new |preserve|      Must be even-aligned.
3216//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3217//        |        |        |
3218//
3219// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3220//         known from SELF's arguments and the Java calling convention.
3221//         Region 6-7 is determined per call site.
3222// Note 2: If the calling convention leaves holes in the incoming argument
3223//         area, those holes are owned by SELF.  Holes in the outgoing area
3224//         are owned by the CALLEE.  Holes should not be nessecary in the
3225//         incoming area, as the Java calling convention is completely under
3226//         the control of the AD file.  Doubles can be sorted and packed to
3227//         avoid holes.  Holes in the outgoing arguments may be nessecary for
3228//         varargs C calling conventions.
3229// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3230//         even aligned with pad0 as needed.
3231//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3232//         region 6-11 is even aligned; it may be padded out more so that
3233//         the region from SP to FP meets the minimum stack alignment.
3234
3235frame %{
3236  // What direction does stack grow in (assumed to be same for C & Java)
3237  stack_direction(TOWARDS_LOW);
3238
3239  // These three registers define part of the calling convention
3240  // between compiled code and the interpreter.
3241  inline_cache_reg(EAX);                // Inline Cache Register
3242  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3243
3244  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3245  cisc_spilling_operand_name(indOffset32);
3246
3247  // Number of stack slots consumed by locking an object
3248  sync_stack_slots(1);
3249
3250  // Compiled code's Frame Pointer
3251  frame_pointer(ESP);
3252  // Interpreter stores its frame pointer in a register which is
3253  // stored to the stack by I2CAdaptors.
3254  // I2CAdaptors convert from interpreted java to compiled java.
3255  interpreter_frame_pointer(EBP);
3256
3257  // Stack alignment requirement
3258  // Alignment size in bytes (128-bit -> 16 bytes)
3259  stack_alignment(StackAlignmentInBytes);
3260
3261  // Number of stack slots between incoming argument block and the start of
3262  // a new frame.  The PROLOG must add this many slots to the stack.  The
3263  // EPILOG must remove this many slots.  Intel needs one slot for
3264  // return address and one for rbp, (must save rbp)
3265  in_preserve_stack_slots(2+VerifyStackAtCalls);
3266
3267  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3268  // for calls to C.  Supports the var-args backing area for register parms.
3269  varargs_C_out_slots_killed(0);
3270
3271  // The after-PROLOG location of the return address.  Location of
3272  // return address specifies a type (REG or STACK) and a number
3273  // representing the register number (i.e. - use a register name) or
3274  // stack slot.
3275  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3276  // Otherwise, it is above the locks and verification slot and alignment word
3277  return_addr(STACK - 1 +
3278              align_up((Compile::current()->in_preserve_stack_slots() +
3279                        Compile::current()->fixed_slots()),
3280                       stack_alignment_in_slots()));
3281
3282  // Body of function which returns an integer array locating
3283  // arguments either in registers or in stack slots.  Passed an array
3284  // of ideal registers called "sig" and a "length" count.  Stack-slot
3285  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3286  // arguments for a CALLEE.  Incoming stack arguments are
3287  // automatically biased by the preserve_stack_slots field above.
3288  calling_convention %{
3289    // No difference between ingoing/outgoing just pass false
3290    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3291  %}
3292
3293
3294  // Body of function which returns an integer array locating
3295  // arguments either in registers or in stack slots.  Passed an array
3296  // of ideal registers called "sig" and a "length" count.  Stack-slot
3297  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3298  // arguments for a CALLEE.  Incoming stack arguments are
3299  // automatically biased by the preserve_stack_slots field above.
3300  c_calling_convention %{
3301    // This is obviously always outgoing
3302    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3303  %}
3304
3305  // Location of C & interpreter return values
3306  c_return_value %{
3307    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3308    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3309    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3310
3311    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3312    // that C functions return float and double results in XMM0.
3313    if( ideal_reg == Op_RegD && UseSSE>=2 )
3314      return OptoRegPair(XMM0b_num,XMM0_num);
3315    if( ideal_reg == Op_RegF && UseSSE>=2 )
3316      return OptoRegPair(OptoReg::Bad,XMM0_num);
3317
3318    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3319  %}
3320
3321  // Location of return values
3322  return_value %{
3323    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3324    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3325    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3326    if( ideal_reg == Op_RegD && UseSSE>=2 )
3327      return OptoRegPair(XMM0b_num,XMM0_num);
3328    if( ideal_reg == Op_RegF && UseSSE>=1 )
3329      return OptoRegPair(OptoReg::Bad,XMM0_num);
3330    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3331  %}
3332
3333%}
3334
3335//----------ATTRIBUTES---------------------------------------------------------
3336//----------Operand Attributes-------------------------------------------------
3337op_attrib op_cost(0);        // Required cost attribute
3338
3339//----------Instruction Attributes---------------------------------------------
3340ins_attrib ins_cost(100);       // Required cost attribute
3341ins_attrib ins_size(8);         // Required size attribute (in bits)
3342ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3343                                // non-matching short branch variant of some
3344                                                            // long branch?
3345ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3346                                // specifies the alignment that some part of the instruction (not
3347                                // necessarily the start) requires.  If > 1, a compute_padding()
3348                                // function must be provided for the instruction
3349
3350//----------OPERANDS-----------------------------------------------------------
3351// Operand definitions must precede instruction definitions for correct parsing
3352// in the ADLC because operands constitute user defined types which are used in
3353// instruction definitions.
3354
3355//----------Simple Operands----------------------------------------------------
3356// Immediate Operands
3357// Integer Immediate
3358operand immI() %{
3359  match(ConI);
3360
3361  op_cost(10);
3362  format %{ %}
3363  interface(CONST_INTER);
3364%}
3365
3366// Constant for test vs zero
3367operand immI0() %{
3368  predicate(n->get_int() == 0);
3369  match(ConI);
3370
3371  op_cost(0);
3372  format %{ %}
3373  interface(CONST_INTER);
3374%}
3375
3376// Constant for increment
3377operand immI1() %{
3378  predicate(n->get_int() == 1);
3379  match(ConI);
3380
3381  op_cost(0);
3382  format %{ %}
3383  interface(CONST_INTER);
3384%}
3385
3386// Constant for decrement
3387operand immI_M1() %{
3388  predicate(n->get_int() == -1);
3389  match(ConI);
3390
3391  op_cost(0);
3392  format %{ %}
3393  interface(CONST_INTER);
3394%}
3395
3396// Valid scale values for addressing modes
3397operand immI2() %{
3398  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3399  match(ConI);
3400
3401  format %{ %}
3402  interface(CONST_INTER);
3403%}
3404
3405operand immI8() %{
3406  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3407  match(ConI);
3408
3409  op_cost(5);
3410  format %{ %}
3411  interface(CONST_INTER);
3412%}
3413
3414operand immI16() %{
3415  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3416  match(ConI);
3417
3418  op_cost(10);
3419  format %{ %}
3420  interface(CONST_INTER);
3421%}
3422
3423// Int Immediate non-negative
3424operand immU31()
3425%{
3426  predicate(n->get_int() >= 0);
3427  match(ConI);
3428
3429  op_cost(0);
3430  format %{ %}
3431  interface(CONST_INTER);
3432%}
3433
3434// Constant for long shifts
3435operand immI_32() %{
3436  predicate( n->get_int() == 32 );
3437  match(ConI);
3438
3439  op_cost(0);
3440  format %{ %}
3441  interface(CONST_INTER);
3442%}
3443
3444operand immI_1_31() %{
3445  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3446  match(ConI);
3447
3448  op_cost(0);
3449  format %{ %}
3450  interface(CONST_INTER);
3451%}
3452
3453operand immI_32_63() %{
3454  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3455  match(ConI);
3456  op_cost(0);
3457
3458  format %{ %}
3459  interface(CONST_INTER);
3460%}
3461
3462operand immI_1() %{
3463  predicate( n->get_int() == 1 );
3464  match(ConI);
3465
3466  op_cost(0);
3467  format %{ %}
3468  interface(CONST_INTER);
3469%}
3470
3471operand immI_2() %{
3472  predicate( n->get_int() == 2 );
3473  match(ConI);
3474
3475  op_cost(0);
3476  format %{ %}
3477  interface(CONST_INTER);
3478%}
3479
3480operand immI_3() %{
3481  predicate( n->get_int() == 3 );
3482  match(ConI);
3483
3484  op_cost(0);
3485  format %{ %}
3486  interface(CONST_INTER);
3487%}
3488
3489// Pointer Immediate
3490operand immP() %{
3491  match(ConP);
3492
3493  op_cost(10);
3494  format %{ %}
3495  interface(CONST_INTER);
3496%}
3497
3498// NULL Pointer Immediate
3499operand immP0() %{
3500  predicate( n->get_ptr() == 0 );
3501  match(ConP);
3502  op_cost(0);
3503
3504  format %{ %}
3505  interface(CONST_INTER);
3506%}
3507
3508// Long Immediate
3509operand immL() %{
3510  match(ConL);
3511
3512  op_cost(20);
3513  format %{ %}
3514  interface(CONST_INTER);
3515%}
3516
3517// Long Immediate zero
3518operand immL0() %{
3519  predicate( n->get_long() == 0L );
3520  match(ConL);
3521  op_cost(0);
3522
3523  format %{ %}
3524  interface(CONST_INTER);
3525%}
3526
3527// Long Immediate zero
3528operand immL_M1() %{
3529  predicate( n->get_long() == -1L );
3530  match(ConL);
3531  op_cost(0);
3532
3533  format %{ %}
3534  interface(CONST_INTER);
3535%}
3536
3537// Long immediate from 0 to 127.
3538// Used for a shorter form of long mul by 10.
3539operand immL_127() %{
3540  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3541  match(ConL);
3542  op_cost(0);
3543
3544  format %{ %}
3545  interface(CONST_INTER);
3546%}
3547
3548// Long Immediate: low 32-bit mask
3549operand immL_32bits() %{
3550  predicate(n->get_long() == 0xFFFFFFFFL);
3551  match(ConL);
3552  op_cost(0);
3553
3554  format %{ %}
3555  interface(CONST_INTER);
3556%}
3557
3558// Long Immediate: low 32-bit mask
3559operand immL32() %{
3560  predicate(n->get_long() == (int)(n->get_long()));
3561  match(ConL);
3562  op_cost(20);
3563
3564  format %{ %}
3565  interface(CONST_INTER);
3566%}
3567
3568//Double Immediate zero
3569operand immDPR0() %{
3570  // Do additional (and counter-intuitive) test against NaN to work around VC++
3571  // bug that generates code such that NaNs compare equal to 0.0
3572  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3573  match(ConD);
3574
3575  op_cost(5);
3576  format %{ %}
3577  interface(CONST_INTER);
3578%}
3579
3580// Double Immediate one
3581operand immDPR1() %{
3582  predicate( UseSSE<=1 && n->getd() == 1.0 );
3583  match(ConD);
3584
3585  op_cost(5);
3586  format %{ %}
3587  interface(CONST_INTER);
3588%}
3589
3590// Double Immediate
3591operand immDPR() %{
3592  predicate(UseSSE<=1);
3593  match(ConD);
3594
3595  op_cost(5);
3596  format %{ %}
3597  interface(CONST_INTER);
3598%}
3599
3600operand immD() %{
3601  predicate(UseSSE>=2);
3602  match(ConD);
3603
3604  op_cost(5);
3605  format %{ %}
3606  interface(CONST_INTER);
3607%}
3608
3609// Double Immediate zero
3610operand immD0() %{
3611  // Do additional (and counter-intuitive) test against NaN to work around VC++
3612  // bug that generates code such that NaNs compare equal to 0.0 AND do not
3613  // compare equal to -0.0.
3614  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3615  match(ConD);
3616
3617  format %{ %}
3618  interface(CONST_INTER);
3619%}
3620
3621// Float Immediate zero
3622operand immFPR0() %{
3623  predicate(UseSSE == 0 && n->getf() == 0.0F);
3624  match(ConF);
3625
3626  op_cost(5);
3627  format %{ %}
3628  interface(CONST_INTER);
3629%}
3630
3631// Float Immediate one
3632operand immFPR1() %{
3633  predicate(UseSSE == 0 && n->getf() == 1.0F);
3634  match(ConF);
3635
3636  op_cost(5);
3637  format %{ %}
3638  interface(CONST_INTER);
3639%}
3640
3641// Float Immediate
3642operand immFPR() %{
3643  predicate( UseSSE == 0 );
3644  match(ConF);
3645
3646  op_cost(5);
3647  format %{ %}
3648  interface(CONST_INTER);
3649%}
3650
3651// Float Immediate
3652operand immF() %{
3653  predicate(UseSSE >= 1);
3654  match(ConF);
3655
3656  op_cost(5);
3657  format %{ %}
3658  interface(CONST_INTER);
3659%}
3660
3661// Float Immediate zero.  Zero and not -0.0
3662operand immF0() %{
3663  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3664  match(ConF);
3665
3666  op_cost(5);
3667  format %{ %}
3668  interface(CONST_INTER);
3669%}
3670
3671// Immediates for special shifts (sign extend)
3672
3673// Constants for increment
3674operand immI_16() %{
3675  predicate( n->get_int() == 16 );
3676  match(ConI);
3677
3678  format %{ %}
3679  interface(CONST_INTER);
3680%}
3681
3682operand immI_24() %{
3683  predicate( n->get_int() == 24 );
3684  match(ConI);
3685
3686  format %{ %}
3687  interface(CONST_INTER);
3688%}
3689
3690// Constant for byte-wide masking
3691operand immI_255() %{
3692  predicate( n->get_int() == 255 );
3693  match(ConI);
3694
3695  format %{ %}
3696  interface(CONST_INTER);
3697%}
3698
3699// Constant for short-wide masking
3700operand immI_65535() %{
3701  predicate(n->get_int() == 65535);
3702  match(ConI);
3703
3704  format %{ %}
3705  interface(CONST_INTER);
3706%}
3707
3708// Register Operands
3709// Integer Register
3710operand rRegI() %{
3711  constraint(ALLOC_IN_RC(int_reg));
3712  match(RegI);
3713  match(xRegI);
3714  match(eAXRegI);
3715  match(eBXRegI);
3716  match(eCXRegI);
3717  match(eDXRegI);
3718  match(eDIRegI);
3719  match(eSIRegI);
3720
3721  format %{ %}
3722  interface(REG_INTER);
3723%}
3724
3725// Subset of Integer Register
3726operand xRegI(rRegI reg) %{
3727  constraint(ALLOC_IN_RC(int_x_reg));
3728  match(reg);
3729  match(eAXRegI);
3730  match(eBXRegI);
3731  match(eCXRegI);
3732  match(eDXRegI);
3733
3734  format %{ %}
3735  interface(REG_INTER);
3736%}
3737
3738// Special Registers
3739operand eAXRegI(xRegI reg) %{
3740  constraint(ALLOC_IN_RC(eax_reg));
3741  match(reg);
3742  match(rRegI);
3743
3744  format %{ "EAX" %}
3745  interface(REG_INTER);
3746%}
3747
3748// Special Registers
3749operand eBXRegI(xRegI reg) %{
3750  constraint(ALLOC_IN_RC(ebx_reg));
3751  match(reg);
3752  match(rRegI);
3753
3754  format %{ "EBX" %}
3755  interface(REG_INTER);
3756%}
3757
3758operand eCXRegI(xRegI reg) %{
3759  constraint(ALLOC_IN_RC(ecx_reg));
3760  match(reg);
3761  match(rRegI);
3762
3763  format %{ "ECX" %}
3764  interface(REG_INTER);
3765%}
3766
3767operand eDXRegI(xRegI reg) %{
3768  constraint(ALLOC_IN_RC(edx_reg));
3769  match(reg);
3770  match(rRegI);
3771
3772  format %{ "EDX" %}
3773  interface(REG_INTER);
3774%}
3775
3776operand eDIRegI(xRegI reg) %{
3777  constraint(ALLOC_IN_RC(edi_reg));
3778  match(reg);
3779  match(rRegI);
3780
3781  format %{ "EDI" %}
3782  interface(REG_INTER);
3783%}
3784
3785operand naxRegI() %{
3786  constraint(ALLOC_IN_RC(nax_reg));
3787  match(RegI);
3788  match(eCXRegI);
3789  match(eDXRegI);
3790  match(eSIRegI);
3791  match(eDIRegI);
3792
3793  format %{ %}
3794  interface(REG_INTER);
3795%}
3796
3797operand nadxRegI() %{
3798  constraint(ALLOC_IN_RC(nadx_reg));
3799  match(RegI);
3800  match(eBXRegI);
3801  match(eCXRegI);
3802  match(eSIRegI);
3803  match(eDIRegI);
3804
3805  format %{ %}
3806  interface(REG_INTER);
3807%}
3808
3809operand ncxRegI() %{
3810  constraint(ALLOC_IN_RC(ncx_reg));
3811  match(RegI);
3812  match(eAXRegI);
3813  match(eDXRegI);
3814  match(eSIRegI);
3815  match(eDIRegI);
3816
3817  format %{ %}
3818  interface(REG_INTER);
3819%}
3820
3821// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3822// //
3823operand eSIRegI(xRegI reg) %{
3824   constraint(ALLOC_IN_RC(esi_reg));
3825   match(reg);
3826   match(rRegI);
3827
3828   format %{ "ESI" %}
3829   interface(REG_INTER);
3830%}
3831
3832// Pointer Register
3833operand anyRegP() %{
3834  constraint(ALLOC_IN_RC(any_reg));
3835  match(RegP);
3836  match(eAXRegP);
3837  match(eBXRegP);
3838  match(eCXRegP);
3839  match(eDIRegP);
3840  match(eRegP);
3841
3842  format %{ %}
3843  interface(REG_INTER);
3844%}
3845
3846operand eRegP() %{
3847  constraint(ALLOC_IN_RC(int_reg));
3848  match(RegP);
3849  match(eAXRegP);
3850  match(eBXRegP);
3851  match(eCXRegP);
3852  match(eDIRegP);
3853
3854  format %{ %}
3855  interface(REG_INTER);
3856%}
3857
3858// On windows95, EBP is not safe to use for implicit null tests.
3859operand eRegP_no_EBP() %{
3860  constraint(ALLOC_IN_RC(int_reg_no_ebp));
3861  match(RegP);
3862  match(eAXRegP);
3863  match(eBXRegP);
3864  match(eCXRegP);
3865  match(eDIRegP);
3866
3867  op_cost(100);
3868  format %{ %}
3869  interface(REG_INTER);
3870%}
3871
3872operand naxRegP() %{
3873  constraint(ALLOC_IN_RC(nax_reg));
3874  match(RegP);
3875  match(eBXRegP);
3876  match(eDXRegP);
3877  match(eCXRegP);
3878  match(eSIRegP);
3879  match(eDIRegP);
3880
3881  format %{ %}
3882  interface(REG_INTER);
3883%}
3884
3885operand nabxRegP() %{
3886  constraint(ALLOC_IN_RC(nabx_reg));
3887  match(RegP);
3888  match(eCXRegP);
3889  match(eDXRegP);
3890  match(eSIRegP);
3891  match(eDIRegP);
3892
3893  format %{ %}
3894  interface(REG_INTER);
3895%}
3896
3897operand pRegP() %{
3898  constraint(ALLOC_IN_RC(p_reg));
3899  match(RegP);
3900  match(eBXRegP);
3901  match(eDXRegP);
3902  match(eSIRegP);
3903  match(eDIRegP);
3904
3905  format %{ %}
3906  interface(REG_INTER);
3907%}
3908
3909// Special Registers
3910// Return a pointer value
3911operand eAXRegP(eRegP reg) %{
3912  constraint(ALLOC_IN_RC(eax_reg));
3913  match(reg);
3914  format %{ "EAX" %}
3915  interface(REG_INTER);
3916%}
3917
3918// Used in AtomicAdd
3919operand eBXRegP(eRegP reg) %{
3920  constraint(ALLOC_IN_RC(ebx_reg));
3921  match(reg);
3922  format %{ "EBX" %}
3923  interface(REG_INTER);
3924%}
3925
3926// Tail-call (interprocedural jump) to interpreter
3927operand eCXRegP(eRegP reg) %{
3928  constraint(ALLOC_IN_RC(ecx_reg));
3929  match(reg);
3930  format %{ "ECX" %}
3931  interface(REG_INTER);
3932%}
3933
3934operand eSIRegP(eRegP reg) %{
3935  constraint(ALLOC_IN_RC(esi_reg));
3936  match(reg);
3937  format %{ "ESI" %}
3938  interface(REG_INTER);
3939%}
3940
3941// Used in rep stosw
3942operand eDIRegP(eRegP reg) %{
3943  constraint(ALLOC_IN_RC(edi_reg));
3944  match(reg);
3945  format %{ "EDI" %}
3946  interface(REG_INTER);
3947%}
3948
3949operand eRegL() %{
3950  constraint(ALLOC_IN_RC(long_reg));
3951  match(RegL);
3952  match(eADXRegL);
3953
3954  format %{ %}
3955  interface(REG_INTER);
3956%}
3957
3958operand eADXRegL( eRegL reg ) %{
3959  constraint(ALLOC_IN_RC(eadx_reg));
3960  match(reg);
3961
3962  format %{ "EDX:EAX" %}
3963  interface(REG_INTER);
3964%}
3965
3966operand eBCXRegL( eRegL reg ) %{
3967  constraint(ALLOC_IN_RC(ebcx_reg));
3968  match(reg);
3969
3970  format %{ "EBX:ECX" %}
3971  interface(REG_INTER);
3972%}
3973
3974// Special case for integer high multiply
3975operand eADXRegL_low_only() %{
3976  constraint(ALLOC_IN_RC(eadx_reg));
3977  match(RegL);
3978
3979  format %{ "EAX" %}
3980  interface(REG_INTER);
3981%}
3982
3983// Flags register, used as output of compare instructions
3984operand eFlagsReg() %{
3985  constraint(ALLOC_IN_RC(int_flags));
3986  match(RegFlags);
3987
3988  format %{ "EFLAGS" %}
3989  interface(REG_INTER);
3990%}
3991
3992// Flags register, used as output of FLOATING POINT compare instructions
3993operand eFlagsRegU() %{
3994  constraint(ALLOC_IN_RC(int_flags));
3995  match(RegFlags);
3996
3997  format %{ "EFLAGS_U" %}
3998  interface(REG_INTER);
3999%}
4000
4001operand eFlagsRegUCF() %{
4002  constraint(ALLOC_IN_RC(int_flags));
4003  match(RegFlags);
4004  predicate(false);
4005
4006  format %{ "EFLAGS_U_CF" %}
4007  interface(REG_INTER);
4008%}
4009
4010// Condition Code Register used by long compare
4011operand flagsReg_long_LTGE() %{
4012  constraint(ALLOC_IN_RC(int_flags));
4013  match(RegFlags);
4014  format %{ "FLAGS_LTGE" %}
4015  interface(REG_INTER);
4016%}
4017operand flagsReg_long_EQNE() %{
4018  constraint(ALLOC_IN_RC(int_flags));
4019  match(RegFlags);
4020  format %{ "FLAGS_EQNE" %}
4021  interface(REG_INTER);
4022%}
4023operand flagsReg_long_LEGT() %{
4024  constraint(ALLOC_IN_RC(int_flags));
4025  match(RegFlags);
4026  format %{ "FLAGS_LEGT" %}
4027  interface(REG_INTER);
4028%}
4029
4030// Condition Code Register used by unsigned long compare
4031operand flagsReg_ulong_LTGE() %{
4032  constraint(ALLOC_IN_RC(int_flags));
4033  match(RegFlags);
4034  format %{ "FLAGS_U_LTGE" %}
4035  interface(REG_INTER);
4036%}
4037operand flagsReg_ulong_EQNE() %{
4038  constraint(ALLOC_IN_RC(int_flags));
4039  match(RegFlags);
4040  format %{ "FLAGS_U_EQNE" %}
4041  interface(REG_INTER);
4042%}
4043operand flagsReg_ulong_LEGT() %{
4044  constraint(ALLOC_IN_RC(int_flags));
4045  match(RegFlags);
4046  format %{ "FLAGS_U_LEGT" %}
4047  interface(REG_INTER);
4048%}
4049
4050// Float register operands
4051operand regDPR() %{
4052  predicate( UseSSE < 2 );
4053  constraint(ALLOC_IN_RC(fp_dbl_reg));
4054  match(RegD);
4055  match(regDPR1);
4056  match(regDPR2);
4057  format %{ %}
4058  interface(REG_INTER);
4059%}
4060
4061operand regDPR1(regDPR reg) %{
4062  predicate( UseSSE < 2 );
4063  constraint(ALLOC_IN_RC(fp_dbl_reg0));
4064  match(reg);
4065  format %{ "FPR1" %}
4066  interface(REG_INTER);
4067%}
4068
4069operand regDPR2(regDPR reg) %{
4070  predicate( UseSSE < 2 );
4071  constraint(ALLOC_IN_RC(fp_dbl_reg1));
4072  match(reg);
4073  format %{ "FPR2" %}
4074  interface(REG_INTER);
4075%}
4076
4077operand regnotDPR1(regDPR reg) %{
4078  predicate( UseSSE < 2 );
4079  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4080  match(reg);
4081  format %{ %}
4082  interface(REG_INTER);
4083%}
4084
4085// Float register operands
4086operand regFPR() %{
4087  predicate( UseSSE < 2 );
4088  constraint(ALLOC_IN_RC(fp_flt_reg));
4089  match(RegF);
4090  match(regFPR1);
4091  format %{ %}
4092  interface(REG_INTER);
4093%}
4094
4095// Float register operands
4096operand regFPR1(regFPR reg) %{
4097  predicate( UseSSE < 2 );
4098  constraint(ALLOC_IN_RC(fp_flt_reg0));
4099  match(reg);
4100  format %{ "FPR1" %}
4101  interface(REG_INTER);
4102%}
4103
4104// XMM Float register operands
4105operand regF() %{
4106  predicate( UseSSE>=1 );
4107  constraint(ALLOC_IN_RC(float_reg_legacy));
4108  match(RegF);
4109  format %{ %}
4110  interface(REG_INTER);
4111%}
4112
4113// XMM Double register operands
4114operand regD() %{
4115  predicate( UseSSE>=2 );
4116  constraint(ALLOC_IN_RC(double_reg_legacy));
4117  match(RegD);
4118  format %{ %}
4119  interface(REG_INTER);
4120%}
4121
4122// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4123// runtime code generation via reg_class_dynamic.
4124operand vecS() %{
4125  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4126  match(VecS);
4127
4128  format %{ %}
4129  interface(REG_INTER);
4130%}
4131
4132operand vecD() %{
4133  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4134  match(VecD);
4135
4136  format %{ %}
4137  interface(REG_INTER);
4138%}
4139
4140operand vecX() %{
4141  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4142  match(VecX);
4143
4144  format %{ %}
4145  interface(REG_INTER);
4146%}
4147
4148operand vecY() %{
4149  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4150  match(VecY);
4151
4152  format %{ %}
4153  interface(REG_INTER);
4154%}
4155
4156//----------Memory Operands----------------------------------------------------
4157// Direct Memory Operand
4158operand direct(immP addr) %{
4159  match(addr);
4160
4161  format %{ "[$addr]" %}
4162  interface(MEMORY_INTER) %{
4163    base(0xFFFFFFFF);
4164    index(0x4);
4165    scale(0x0);
4166    disp($addr);
4167  %}
4168%}
4169
4170// Indirect Memory Operand
4171operand indirect(eRegP reg) %{
4172  constraint(ALLOC_IN_RC(int_reg));
4173  match(reg);
4174
4175  format %{ "[$reg]" %}
4176  interface(MEMORY_INTER) %{
4177    base($reg);
4178    index(0x4);
4179    scale(0x0);
4180    disp(0x0);
4181  %}
4182%}
4183
4184// Indirect Memory Plus Short Offset Operand
4185operand indOffset8(eRegP reg, immI8 off) %{
4186  match(AddP reg off);
4187
4188  format %{ "[$reg + $off]" %}
4189  interface(MEMORY_INTER) %{
4190    base($reg);
4191    index(0x4);
4192    scale(0x0);
4193    disp($off);
4194  %}
4195%}
4196
4197// Indirect Memory Plus Long Offset Operand
4198operand indOffset32(eRegP reg, immI off) %{
4199  match(AddP reg off);
4200
4201  format %{ "[$reg + $off]" %}
4202  interface(MEMORY_INTER) %{
4203    base($reg);
4204    index(0x4);
4205    scale(0x0);
4206    disp($off);
4207  %}
4208%}
4209
4210// Indirect Memory Plus Long Offset Operand
4211operand indOffset32X(rRegI reg, immP off) %{
4212  match(AddP off reg);
4213
4214  format %{ "[$reg + $off]" %}
4215  interface(MEMORY_INTER) %{
4216    base($reg);
4217    index(0x4);
4218    scale(0x0);
4219    disp($off);
4220  %}
4221%}
4222
4223// Indirect Memory Plus Index Register Plus Offset Operand
4224operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4225  match(AddP (AddP reg ireg) off);
4226
4227  op_cost(10);
4228  format %{"[$reg + $off + $ireg]" %}
4229  interface(MEMORY_INTER) %{
4230    base($reg);
4231    index($ireg);
4232    scale(0x0);
4233    disp($off);
4234  %}
4235%}
4236
4237// Indirect Memory Plus Index Register Plus Offset Operand
4238operand indIndex(eRegP reg, rRegI ireg) %{
4239  match(AddP reg ireg);
4240
4241  op_cost(10);
4242  format %{"[$reg + $ireg]" %}
4243  interface(MEMORY_INTER) %{
4244    base($reg);
4245    index($ireg);
4246    scale(0x0);
4247    disp(0x0);
4248  %}
4249%}
4250
4251// // -------------------------------------------------------------------------
4252// // 486 architecture doesn't support "scale * index + offset" with out a base
4253// // -------------------------------------------------------------------------
4254// // Scaled Memory Operands
4255// // Indirect Memory Times Scale Plus Offset Operand
4256// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4257//   match(AddP off (LShiftI ireg scale));
4258//
4259//   op_cost(10);
4260//   format %{"[$off + $ireg << $scale]" %}
4261//   interface(MEMORY_INTER) %{
4262//     base(0x4);
4263//     index($ireg);
4264//     scale($scale);
4265//     disp($off);
4266//   %}
4267// %}
4268
4269// Indirect Memory Times Scale Plus Index Register
4270operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4271  match(AddP reg (LShiftI ireg scale));
4272
4273  op_cost(10);
4274  format %{"[$reg + $ireg << $scale]" %}
4275  interface(MEMORY_INTER) %{
4276    base($reg);
4277    index($ireg);
4278    scale($scale);
4279    disp(0x0);
4280  %}
4281%}
4282
4283// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4284operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4285  match(AddP (AddP reg (LShiftI ireg scale)) off);
4286
4287  op_cost(10);
4288  format %{"[$reg + $off + $ireg << $scale]" %}
4289  interface(MEMORY_INTER) %{
4290    base($reg);
4291    index($ireg);
4292    scale($scale);
4293    disp($off);
4294  %}
4295%}
4296
4297//----------Load Long Memory Operands------------------------------------------
4298// The load-long idiom will use it's address expression again after loading
4299// the first word of the long.  If the load-long destination overlaps with
4300// registers used in the addressing expression, the 2nd half will be loaded
4301// from a clobbered address.  Fix this by requiring that load-long use
4302// address registers that do not overlap with the load-long target.
4303
4304// load-long support
4305operand load_long_RegP() %{
4306  constraint(ALLOC_IN_RC(esi_reg));
4307  match(RegP);
4308  match(eSIRegP);
4309  op_cost(100);
4310  format %{  %}
4311  interface(REG_INTER);
4312%}
4313
4314// Indirect Memory Operand Long
4315operand load_long_indirect(load_long_RegP reg) %{
4316  constraint(ALLOC_IN_RC(esi_reg));
4317  match(reg);
4318
4319  format %{ "[$reg]" %}
4320  interface(MEMORY_INTER) %{
4321    base($reg);
4322    index(0x4);
4323    scale(0x0);
4324    disp(0x0);
4325  %}
4326%}
4327
4328// Indirect Memory Plus Long Offset Operand
4329operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4330  match(AddP reg off);
4331
4332  format %{ "[$reg + $off]" %}
4333  interface(MEMORY_INTER) %{
4334    base($reg);
4335    index(0x4);
4336    scale(0x0);
4337    disp($off);
4338  %}
4339%}
4340
4341opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4342
4343
4344//----------Special Memory Operands--------------------------------------------
4345// Stack Slot Operand - This operand is used for loading and storing temporary
4346//                      values on the stack where a match requires a value to
4347//                      flow through memory.
4348operand stackSlotP(sRegP reg) %{
4349  constraint(ALLOC_IN_RC(stack_slots));
4350  // No match rule because this operand is only generated in matching
4351  format %{ "[$reg]" %}
4352  interface(MEMORY_INTER) %{
4353    base(0x4);   // ESP
4354    index(0x4);  // No Index
4355    scale(0x0);  // No Scale
4356    disp($reg);  // Stack Offset
4357  %}
4358%}
4359
4360operand stackSlotI(sRegI reg) %{
4361  constraint(ALLOC_IN_RC(stack_slots));
4362  // No match rule because this operand is only generated in matching
4363  format %{ "[$reg]" %}
4364  interface(MEMORY_INTER) %{
4365    base(0x4);   // ESP
4366    index(0x4);  // No Index
4367    scale(0x0);  // No Scale
4368    disp($reg);  // Stack Offset
4369  %}
4370%}
4371
4372operand stackSlotF(sRegF reg) %{
4373  constraint(ALLOC_IN_RC(stack_slots));
4374  // No match rule because this operand is only generated in matching
4375  format %{ "[$reg]" %}
4376  interface(MEMORY_INTER) %{
4377    base(0x4);   // ESP
4378    index(0x4);  // No Index
4379    scale(0x0);  // No Scale
4380    disp($reg);  // Stack Offset
4381  %}
4382%}
4383
4384operand stackSlotD(sRegD reg) %{
4385  constraint(ALLOC_IN_RC(stack_slots));
4386  // No match rule because this operand is only generated in matching
4387  format %{ "[$reg]" %}
4388  interface(MEMORY_INTER) %{
4389    base(0x4);   // ESP
4390    index(0x4);  // No Index
4391    scale(0x0);  // No Scale
4392    disp($reg);  // Stack Offset
4393  %}
4394%}
4395
4396operand stackSlotL(sRegL reg) %{
4397  constraint(ALLOC_IN_RC(stack_slots));
4398  // No match rule because this operand is only generated in matching
4399  format %{ "[$reg]" %}
4400  interface(MEMORY_INTER) %{
4401    base(0x4);   // ESP
4402    index(0x4);  // No Index
4403    scale(0x0);  // No Scale
4404    disp($reg);  // Stack Offset
4405  %}
4406%}
4407
4408//----------Memory Operands - Win95 Implicit Null Variants----------------
4409// Indirect Memory Operand
4410operand indirect_win95_safe(eRegP_no_EBP reg)
4411%{
4412  constraint(ALLOC_IN_RC(int_reg));
4413  match(reg);
4414
4415  op_cost(100);
4416  format %{ "[$reg]" %}
4417  interface(MEMORY_INTER) %{
4418    base($reg);
4419    index(0x4);
4420    scale(0x0);
4421    disp(0x0);
4422  %}
4423%}
4424
4425// Indirect Memory Plus Short Offset Operand
4426operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4427%{
4428  match(AddP reg off);
4429
4430  op_cost(100);
4431  format %{ "[$reg + $off]" %}
4432  interface(MEMORY_INTER) %{
4433    base($reg);
4434    index(0x4);
4435    scale(0x0);
4436    disp($off);
4437  %}
4438%}
4439
4440// Indirect Memory Plus Long Offset Operand
4441operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4442%{
4443  match(AddP reg off);
4444
4445  op_cost(100);
4446  format %{ "[$reg + $off]" %}
4447  interface(MEMORY_INTER) %{
4448    base($reg);
4449    index(0x4);
4450    scale(0x0);
4451    disp($off);
4452  %}
4453%}
4454
4455// Indirect Memory Plus Index Register Plus Offset Operand
4456operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4457%{
4458  match(AddP (AddP reg ireg) off);
4459
4460  op_cost(100);
4461  format %{"[$reg + $off + $ireg]" %}
4462  interface(MEMORY_INTER) %{
4463    base($reg);
4464    index($ireg);
4465    scale(0x0);
4466    disp($off);
4467  %}
4468%}
4469
4470// Indirect Memory Times Scale Plus Index Register
4471operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4472%{
4473  match(AddP reg (LShiftI ireg scale));
4474
4475  op_cost(100);
4476  format %{"[$reg + $ireg << $scale]" %}
4477  interface(MEMORY_INTER) %{
4478    base($reg);
4479    index($ireg);
4480    scale($scale);
4481    disp(0x0);
4482  %}
4483%}
4484
4485// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4486operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4487%{
4488  match(AddP (AddP reg (LShiftI ireg scale)) off);
4489
4490  op_cost(100);
4491  format %{"[$reg + $off + $ireg << $scale]" %}
4492  interface(MEMORY_INTER) %{
4493    base($reg);
4494    index($ireg);
4495    scale($scale);
4496    disp($off);
4497  %}
4498%}
4499
4500//----------Conditional Branch Operands----------------------------------------
4501// Comparison Op  - This is the operation of the comparison, and is limited to
4502//                  the following set of codes:
4503//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4504//
4505// Other attributes of the comparison, such as unsignedness, are specified
4506// by the comparison instruction that sets a condition code flags register.
4507// That result is represented by a flags operand whose subtype is appropriate
4508// to the unsignedness (etc.) of the comparison.
4509//
4510// Later, the instruction which matches both the Comparison Op (a Bool) and
4511// the flags (produced by the Cmp) specifies the coding of the comparison op
4512// by matching a specific subtype of Bool operand below, such as cmpOpU.
4513
4514// Comparision Code
4515operand cmpOp() %{
4516  match(Bool);
4517
4518  format %{ "" %}
4519  interface(COND_INTER) %{
4520    equal(0x4, "e");
4521    not_equal(0x5, "ne");
4522    less(0xC, "l");
4523    greater_equal(0xD, "ge");
4524    less_equal(0xE, "le");
4525    greater(0xF, "g");
4526    overflow(0x0, "o");
4527    no_overflow(0x1, "no");
4528  %}
4529%}
4530
4531// Comparison Code, unsigned compare.  Used by FP also, with
4532// C2 (unordered) turned into GT or LT already.  The other bits
4533// C0 and C3 are turned into Carry & Zero flags.
4534operand cmpOpU() %{
4535  match(Bool);
4536
4537  format %{ "" %}
4538  interface(COND_INTER) %{
4539    equal(0x4, "e");
4540    not_equal(0x5, "ne");
4541    less(0x2, "b");
4542    greater_equal(0x3, "nb");
4543    less_equal(0x6, "be");
4544    greater(0x7, "nbe");
4545    overflow(0x0, "o");
4546    no_overflow(0x1, "no");
4547  %}
4548%}
4549
4550// Floating comparisons that don't require any fixup for the unordered case
4551operand cmpOpUCF() %{
4552  match(Bool);
4553  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4554            n->as_Bool()->_test._test == BoolTest::ge ||
4555            n->as_Bool()->_test._test == BoolTest::le ||
4556            n->as_Bool()->_test._test == BoolTest::gt);
4557  format %{ "" %}
4558  interface(COND_INTER) %{
4559    equal(0x4, "e");
4560    not_equal(0x5, "ne");
4561    less(0x2, "b");
4562    greater_equal(0x3, "nb");
4563    less_equal(0x6, "be");
4564    greater(0x7, "nbe");
4565    overflow(0x0, "o");
4566    no_overflow(0x1, "no");
4567  %}
4568%}
4569
4570
4571// Floating comparisons that can be fixed up with extra conditional jumps
4572operand cmpOpUCF2() %{
4573  match(Bool);
4574  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4575            n->as_Bool()->_test._test == BoolTest::eq);
4576  format %{ "" %}
4577  interface(COND_INTER) %{
4578    equal(0x4, "e");
4579    not_equal(0x5, "ne");
4580    less(0x2, "b");
4581    greater_equal(0x3, "nb");
4582    less_equal(0x6, "be");
4583    greater(0x7, "nbe");
4584    overflow(0x0, "o");
4585    no_overflow(0x1, "no");
4586  %}
4587%}
4588
4589// Comparison Code for FP conditional move
4590operand cmpOp_fcmov() %{
4591  match(Bool);
4592
4593  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4594            n->as_Bool()->_test._test != BoolTest::no_overflow);
4595  format %{ "" %}
4596  interface(COND_INTER) %{
4597    equal        (0x0C8);
4598    not_equal    (0x1C8);
4599    less         (0x0C0);
4600    greater_equal(0x1C0);
4601    less_equal   (0x0D0);
4602    greater      (0x1D0);
4603    overflow(0x0, "o"); // not really supported by the instruction
4604    no_overflow(0x1, "no"); // not really supported by the instruction
4605  %}
4606%}
4607
4608// Comparison Code used in long compares
4609operand cmpOp_commute() %{
4610  match(Bool);
4611
4612  format %{ "" %}
4613  interface(COND_INTER) %{
4614    equal(0x4, "e");
4615    not_equal(0x5, "ne");
4616    less(0xF, "g");
4617    greater_equal(0xE, "le");
4618    less_equal(0xD, "ge");
4619    greater(0xC, "l");
4620    overflow(0x0, "o");
4621    no_overflow(0x1, "no");
4622  %}
4623%}
4624
4625// Comparison Code used in unsigned long compares
4626operand cmpOpU_commute() %{
4627  match(Bool);
4628
4629  format %{ "" %}
4630  interface(COND_INTER) %{
4631    equal(0x4, "e");
4632    not_equal(0x5, "ne");
4633    less(0x7, "nbe");
4634    greater_equal(0x6, "be");
4635    less_equal(0x3, "nb");
4636    greater(0x2, "b");
4637    overflow(0x0, "o");
4638    no_overflow(0x1, "no");
4639  %}
4640%}
4641
4642//----------OPERAND CLASSES----------------------------------------------------
4643// Operand Classes are groups of operands that are used as to simplify
4644// instruction definitions by not requiring the AD writer to specify separate
4645// instructions for every form of operand when the instruction accepts
4646// multiple operand types with the same basic encoding and format.  The classic
4647// case of this is memory operands.
4648
4649opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4650               indIndex, indIndexScale, indIndexScaleOffset);
4651
4652// Long memory operations are encoded in 2 instructions and a +4 offset.
4653// This means some kind of offset is always required and you cannot use
4654// an oop as the offset (done when working on static globals).
4655opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4656                    indIndex, indIndexScale, indIndexScaleOffset);
4657
4658
4659//----------PIPELINE-----------------------------------------------------------
4660// Rules which define the behavior of the target architectures pipeline.
4661pipeline %{
4662
4663//----------ATTRIBUTES---------------------------------------------------------
4664attributes %{
4665  variable_size_instructions;        // Fixed size instructions
4666  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4667  instruction_unit_size = 1;         // An instruction is 1 bytes long
4668  instruction_fetch_unit_size = 16;  // The processor fetches one line
4669  instruction_fetch_units = 1;       // of 16 bytes
4670
4671  // List of nop instructions
4672  nops( MachNop );
4673%}
4674
4675//----------RESOURCES----------------------------------------------------------
4676// Resources are the functional units available to the machine
4677
4678// Generic P2/P3 pipeline
4679// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4680// 3 instructions decoded per cycle.
4681// 2 load/store ops per cycle, 1 branch, 1 FPU,
4682// 2 ALU op, only ALU0 handles mul/div instructions.
4683resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4684           MS0, MS1, MEM = MS0 | MS1,
4685           BR, FPU,
4686           ALU0, ALU1, ALU = ALU0 | ALU1 );
4687
4688//----------PIPELINE DESCRIPTION-----------------------------------------------
4689// Pipeline Description specifies the stages in the machine's pipeline
4690
4691// Generic P2/P3 pipeline
4692pipe_desc(S0, S1, S2, S3, S4, S5);
4693
4694//----------PIPELINE CLASSES---------------------------------------------------
4695// Pipeline Classes describe the stages in which input and output are
4696// referenced by the hardware pipeline.
4697
4698// Naming convention: ialu or fpu
4699// Then: _reg
4700// Then: _reg if there is a 2nd register
4701// Then: _long if it's a pair of instructions implementing a long
4702// Then: _fat if it requires the big decoder
4703//   Or: _mem if it requires the big decoder and a memory unit.
4704
4705// Integer ALU reg operation
4706pipe_class ialu_reg(rRegI dst) %{
4707    single_instruction;
4708    dst    : S4(write);
4709    dst    : S3(read);
4710    DECODE : S0;        // any decoder
4711    ALU    : S3;        // any alu
4712%}
4713
4714// Long ALU reg operation
4715pipe_class ialu_reg_long(eRegL dst) %{
4716    instruction_count(2);
4717    dst    : S4(write);
4718    dst    : S3(read);
4719    DECODE : S0(2);     // any 2 decoders
4720    ALU    : S3(2);     // both alus
4721%}
4722
4723// Integer ALU reg operation using big decoder
4724pipe_class ialu_reg_fat(rRegI dst) %{
4725    single_instruction;
4726    dst    : S4(write);
4727    dst    : S3(read);
4728    D0     : S0;        // big decoder only
4729    ALU    : S3;        // any alu
4730%}
4731
4732// Long ALU reg operation using big decoder
4733pipe_class ialu_reg_long_fat(eRegL dst) %{
4734    instruction_count(2);
4735    dst    : S4(write);
4736    dst    : S3(read);
4737    D0     : S0(2);     // big decoder only; twice
4738    ALU    : S3(2);     // any 2 alus
4739%}
4740
4741// Integer ALU reg-reg operation
4742pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4743    single_instruction;
4744    dst    : S4(write);
4745    src    : S3(read);
4746    DECODE : S0;        // any decoder
4747    ALU    : S3;        // any alu
4748%}
4749
4750// Long ALU reg-reg operation
4751pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4752    instruction_count(2);
4753    dst    : S4(write);
4754    src    : S3(read);
4755    DECODE : S0(2);     // any 2 decoders
4756    ALU    : S3(2);     // both alus
4757%}
4758
4759// Integer ALU reg-reg operation
4760pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4761    single_instruction;
4762    dst    : S4(write);
4763    src    : S3(read);
4764    D0     : S0;        // big decoder only
4765    ALU    : S3;        // any alu
4766%}
4767
4768// Long ALU reg-reg operation
4769pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4770    instruction_count(2);
4771    dst    : S4(write);
4772    src    : S3(read);
4773    D0     : S0(2);     // big decoder only; twice
4774    ALU    : S3(2);     // both alus
4775%}
4776
4777// Integer ALU reg-mem operation
4778pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4779    single_instruction;
4780    dst    : S5(write);
4781    mem    : S3(read);
4782    D0     : S0;        // big decoder only
4783    ALU    : S4;        // any alu
4784    MEM    : S3;        // any mem
4785%}
4786
4787// Long ALU reg-mem operation
4788pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4789    instruction_count(2);
4790    dst    : S5(write);
4791    mem    : S3(read);
4792    D0     : S0(2);     // big decoder only; twice
4793    ALU    : S4(2);     // any 2 alus
4794    MEM    : S3(2);     // both mems
4795%}
4796
4797// Integer mem operation (prefetch)
4798pipe_class ialu_mem(memory mem)
4799%{
4800    single_instruction;
4801    mem    : S3(read);
4802    D0     : S0;        // big decoder only
4803    MEM    : S3;        // any mem
4804%}
4805
4806// Integer Store to Memory
4807pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4808    single_instruction;
4809    mem    : S3(read);
4810    src    : S5(read);
4811    D0     : S0;        // big decoder only
4812    ALU    : S4;        // any alu
4813    MEM    : S3;
4814%}
4815
4816// Long Store to Memory
4817pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4818    instruction_count(2);
4819    mem    : S3(read);
4820    src    : S5(read);
4821    D0     : S0(2);     // big decoder only; twice
4822    ALU    : S4(2);     // any 2 alus
4823    MEM    : S3(2);     // Both mems
4824%}
4825
4826// Integer Store to Memory
4827pipe_class ialu_mem_imm(memory mem) %{
4828    single_instruction;
4829    mem    : S3(read);
4830    D0     : S0;        // big decoder only
4831    ALU    : S4;        // any alu
4832    MEM    : S3;
4833%}
4834
4835// Integer ALU0 reg-reg operation
4836pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4837    single_instruction;
4838    dst    : S4(write);
4839    src    : S3(read);
4840    D0     : S0;        // Big decoder only
4841    ALU0   : S3;        // only alu0
4842%}
4843
4844// Integer ALU0 reg-mem operation
4845pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4846    single_instruction;
4847    dst    : S5(write);
4848    mem    : S3(read);
4849    D0     : S0;        // big decoder only
4850    ALU0   : S4;        // ALU0 only
4851    MEM    : S3;        // any mem
4852%}
4853
4854// Integer ALU reg-reg operation
4855pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4856    single_instruction;
4857    cr     : S4(write);
4858    src1   : S3(read);
4859    src2   : S3(read);
4860    DECODE : S0;        // any decoder
4861    ALU    : S3;        // any alu
4862%}
4863
4864// Integer ALU reg-imm operation
4865pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4866    single_instruction;
4867    cr     : S4(write);
4868    src1   : S3(read);
4869    DECODE : S0;        // any decoder
4870    ALU    : S3;        // any alu
4871%}
4872
4873// Integer ALU reg-mem operation
4874pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4875    single_instruction;
4876    cr     : S4(write);
4877    src1   : S3(read);
4878    src2   : S3(read);
4879    D0     : S0;        // big decoder only
4880    ALU    : S4;        // any alu
4881    MEM    : S3;
4882%}
4883
4884// Conditional move reg-reg
4885pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4886    instruction_count(4);
4887    y      : S4(read);
4888    q      : S3(read);
4889    p      : S3(read);
4890    DECODE : S0(4);     // any decoder
4891%}
4892
4893// Conditional move reg-reg
4894pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4895    single_instruction;
4896    dst    : S4(write);
4897    src    : S3(read);
4898    cr     : S3(read);
4899    DECODE : S0;        // any decoder
4900%}
4901
4902// Conditional move reg-mem
4903pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4904    single_instruction;
4905    dst    : S4(write);
4906    src    : S3(read);
4907    cr     : S3(read);
4908    DECODE : S0;        // any decoder
4909    MEM    : S3;
4910%}
4911
4912// Conditional move reg-reg long
4913pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4914    single_instruction;
4915    dst    : S4(write);
4916    src    : S3(read);
4917    cr     : S3(read);
4918    DECODE : S0(2);     // any 2 decoders
4919%}
4920
4921// Conditional move double reg-reg
4922pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4923    single_instruction;
4924    dst    : S4(write);
4925    src    : S3(read);
4926    cr     : S3(read);
4927    DECODE : S0;        // any decoder
4928%}
4929
4930// Float reg-reg operation
4931pipe_class fpu_reg(regDPR dst) %{
4932    instruction_count(2);
4933    dst    : S3(read);
4934    DECODE : S0(2);     // any 2 decoders
4935    FPU    : S3;
4936%}
4937
4938// Float reg-reg operation
4939pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4940    instruction_count(2);
4941    dst    : S4(write);
4942    src    : S3(read);
4943    DECODE : S0(2);     // any 2 decoders
4944    FPU    : S3;
4945%}
4946
4947// Float reg-reg operation
4948pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4949    instruction_count(3);
4950    dst    : S4(write);
4951    src1   : S3(read);
4952    src2   : S3(read);
4953    DECODE : S0(3);     // any 3 decoders
4954    FPU    : S3(2);
4955%}
4956
4957// Float reg-reg operation
4958pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4959    instruction_count(4);
4960    dst    : S4(write);
4961    src1   : S3(read);
4962    src2   : S3(read);
4963    src3   : S3(read);
4964    DECODE : S0(4);     // any 3 decoders
4965    FPU    : S3(2);
4966%}
4967
4968// Float reg-reg operation
4969pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4970    instruction_count(4);
4971    dst    : S4(write);
4972    src1   : S3(read);
4973    src2   : S3(read);
4974    src3   : S3(read);
4975    DECODE : S1(3);     // any 3 decoders
4976    D0     : S0;        // Big decoder only
4977    FPU    : S3(2);
4978    MEM    : S3;
4979%}
4980
4981// Float reg-mem operation
4982pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4983    instruction_count(2);
4984    dst    : S5(write);
4985    mem    : S3(read);
4986    D0     : S0;        // big decoder only
4987    DECODE : S1;        // any decoder for FPU POP
4988    FPU    : S4;
4989    MEM    : S3;        // any mem
4990%}
4991
4992// Float reg-mem operation
4993pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4994    instruction_count(3);
4995    dst    : S5(write);
4996    src1   : S3(read);
4997    mem    : S3(read);
4998    D0     : S0;        // big decoder only
4999    DECODE : S1(2);     // any decoder for FPU POP
5000    FPU    : S4;
5001    MEM    : S3;        // any mem
5002%}
5003
5004// Float mem-reg operation
5005pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5006    instruction_count(2);
5007    src    : S5(read);
5008    mem    : S3(read);
5009    DECODE : S0;        // any decoder for FPU PUSH
5010    D0     : S1;        // big decoder only
5011    FPU    : S4;
5012    MEM    : S3;        // any mem
5013%}
5014
5015pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5016    instruction_count(3);
5017    src1   : S3(read);
5018    src2   : S3(read);
5019    mem    : S3(read);
5020    DECODE : S0(2);     // any decoder for FPU PUSH
5021    D0     : S1;        // big decoder only
5022    FPU    : S4;
5023    MEM    : S3;        // any mem
5024%}
5025
5026pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5027    instruction_count(3);
5028    src1   : S3(read);
5029    src2   : S3(read);
5030    mem    : S4(read);
5031    DECODE : S0;        // any decoder for FPU PUSH
5032    D0     : S0(2);     // big decoder only
5033    FPU    : S4;
5034    MEM    : S3(2);     // any mem
5035%}
5036
5037pipe_class fpu_mem_mem(memory dst, memory src1) %{
5038    instruction_count(2);
5039    src1   : S3(read);
5040    dst    : S4(read);
5041    D0     : S0(2);     // big decoder only
5042    MEM    : S3(2);     // any mem
5043%}
5044
5045pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5046    instruction_count(3);
5047    src1   : S3(read);
5048    src2   : S3(read);
5049    dst    : S4(read);
5050    D0     : S0(3);     // big decoder only
5051    FPU    : S4;
5052    MEM    : S3(3);     // any mem
5053%}
5054
5055pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5056    instruction_count(3);
5057    src1   : S4(read);
5058    mem    : S4(read);
5059    DECODE : S0;        // any decoder for FPU PUSH
5060    D0     : S0(2);     // big decoder only
5061    FPU    : S4;
5062    MEM    : S3(2);     // any mem
5063%}
5064
5065// Float load constant
5066pipe_class fpu_reg_con(regDPR dst) %{
5067    instruction_count(2);
5068    dst    : S5(write);
5069    D0     : S0;        // big decoder only for the load
5070    DECODE : S1;        // any decoder for FPU POP
5071    FPU    : S4;
5072    MEM    : S3;        // any mem
5073%}
5074
5075// Float load constant
5076pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5077    instruction_count(3);
5078    dst    : S5(write);
5079    src    : S3(read);
5080    D0     : S0;        // big decoder only for the load
5081    DECODE : S1(2);     // any decoder for FPU POP
5082    FPU    : S4;
5083    MEM    : S3;        // any mem
5084%}
5085
5086// UnConditional branch
5087pipe_class pipe_jmp( label labl ) %{
5088    single_instruction;
5089    BR   : S3;
5090%}
5091
5092// Conditional branch
5093pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5094    single_instruction;
5095    cr    : S1(read);
5096    BR    : S3;
5097%}
5098
5099// Allocation idiom
5100pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5101    instruction_count(1); force_serialization;
5102    fixed_latency(6);
5103    heap_ptr : S3(read);
5104    DECODE   : S0(3);
5105    D0       : S2;
5106    MEM      : S3;
5107    ALU      : S3(2);
5108    dst      : S5(write);
5109    BR       : S5;
5110%}
5111
5112// Generic big/slow expanded idiom
5113pipe_class pipe_slow(  ) %{
5114    instruction_count(10); multiple_bundles; force_serialization;
5115    fixed_latency(100);
5116    D0  : S0(2);
5117    MEM : S3(2);
5118%}
5119
5120// The real do-nothing guy
5121pipe_class empty( ) %{
5122    instruction_count(0);
5123%}
5124
5125// Define the class for the Nop node
5126define %{
5127   MachNop = empty;
5128%}
5129
5130%}
5131
5132//----------INSTRUCTIONS-------------------------------------------------------
5133//
5134// match      -- States which machine-independent subtree may be replaced
5135//               by this instruction.
5136// ins_cost   -- The estimated cost of this instruction is used by instruction
5137//               selection to identify a minimum cost tree of machine
5138//               instructions that matches a tree of machine-independent
5139//               instructions.
5140// format     -- A string providing the disassembly for this instruction.
5141//               The value of an instruction's operand may be inserted
5142//               by referring to it with a '$' prefix.
5143// opcode     -- Three instruction opcodes may be provided.  These are referred
5144//               to within an encode class as $primary, $secondary, and $tertiary
5145//               respectively.  The primary opcode is commonly used to
5146//               indicate the type of machine instruction, while secondary
5147//               and tertiary are often used for prefix options or addressing
5148//               modes.
5149// ins_encode -- A list of encode classes with parameters. The encode class
5150//               name must have been defined in an 'enc_class' specification
5151//               in the encode section of the architecture description.
5152
5153//----------BSWAP-Instruction--------------------------------------------------
5154instruct bytes_reverse_int(rRegI dst) %{
5155  match(Set dst (ReverseBytesI dst));
5156
5157  format %{ "BSWAP  $dst" %}
5158  opcode(0x0F, 0xC8);
5159  ins_encode( OpcP, OpcSReg(dst) );
5160  ins_pipe( ialu_reg );
5161%}
5162
5163instruct bytes_reverse_long(eRegL dst) %{
5164  match(Set dst (ReverseBytesL dst));
5165
5166  format %{ "BSWAP  $dst.lo\n\t"
5167            "BSWAP  $dst.hi\n\t"
5168            "XCHG   $dst.lo $dst.hi" %}
5169
5170  ins_cost(125);
5171  ins_encode( bswap_long_bytes(dst) );
5172  ins_pipe( ialu_reg_reg);
5173%}
5174
5175instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5176  match(Set dst (ReverseBytesUS dst));
5177  effect(KILL cr);
5178
5179  format %{ "BSWAP  $dst\n\t"
5180            "SHR    $dst,16\n\t" %}
5181  ins_encode %{
5182    __ bswapl($dst$$Register);
5183    __ shrl($dst$$Register, 16);
5184  %}
5185  ins_pipe( ialu_reg );
5186%}
5187
5188instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5189  match(Set dst (ReverseBytesS dst));
5190  effect(KILL cr);
5191
5192  format %{ "BSWAP  $dst\n\t"
5193            "SAR    $dst,16\n\t" %}
5194  ins_encode %{
5195    __ bswapl($dst$$Register);
5196    __ sarl($dst$$Register, 16);
5197  %}
5198  ins_pipe( ialu_reg );
5199%}
5200
5201
5202//---------- Zeros Count Instructions ------------------------------------------
5203
5204instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5205  predicate(UseCountLeadingZerosInstruction);
5206  match(Set dst (CountLeadingZerosI src));
5207  effect(KILL cr);
5208
5209  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5210  ins_encode %{
5211    __ lzcntl($dst$$Register, $src$$Register);
5212  %}
5213  ins_pipe(ialu_reg);
5214%}
5215
5216instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5217  predicate(!UseCountLeadingZerosInstruction);
5218  match(Set dst (CountLeadingZerosI src));
5219  effect(KILL cr);
5220
5221  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5222            "JNZ    skip\n\t"
5223            "MOV    $dst, -1\n"
5224      "skip:\n\t"
5225            "NEG    $dst\n\t"
5226            "ADD    $dst, 31" %}
5227  ins_encode %{
5228    Register Rdst = $dst$$Register;
5229    Register Rsrc = $src$$Register;
5230    Label skip;
5231    __ bsrl(Rdst, Rsrc);
5232    __ jccb(Assembler::notZero, skip);
5233    __ movl(Rdst, -1);
5234    __ bind(skip);
5235    __ negl(Rdst);
5236    __ addl(Rdst, BitsPerInt - 1);
5237  %}
5238  ins_pipe(ialu_reg);
5239%}
5240
5241instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5242  predicate(UseCountLeadingZerosInstruction);
5243  match(Set dst (CountLeadingZerosL src));
5244  effect(TEMP dst, KILL cr);
5245
5246  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5247            "JNC    done\n\t"
5248            "LZCNT  $dst, $src.lo\n\t"
5249            "ADD    $dst, 32\n"
5250      "done:" %}
5251  ins_encode %{
5252    Register Rdst = $dst$$Register;
5253    Register Rsrc = $src$$Register;
5254    Label done;
5255    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5256    __ jccb(Assembler::carryClear, done);
5257    __ lzcntl(Rdst, Rsrc);
5258    __ addl(Rdst, BitsPerInt);
5259    __ bind(done);
5260  %}
5261  ins_pipe(ialu_reg);
5262%}
5263
5264instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5265  predicate(!UseCountLeadingZerosInstruction);
5266  match(Set dst (CountLeadingZerosL src));
5267  effect(TEMP dst, KILL cr);
5268
5269  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5270            "JZ     msw_is_zero\n\t"
5271            "ADD    $dst, 32\n\t"
5272            "JMP    not_zero\n"
5273      "msw_is_zero:\n\t"
5274            "BSR    $dst, $src.lo\n\t"
5275            "JNZ    not_zero\n\t"
5276            "MOV    $dst, -1\n"
5277      "not_zero:\n\t"
5278            "NEG    $dst\n\t"
5279            "ADD    $dst, 63\n" %}
5280 ins_encode %{
5281    Register Rdst = $dst$$Register;
5282    Register Rsrc = $src$$Register;
5283    Label msw_is_zero;
5284    Label not_zero;
5285    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5286    __ jccb(Assembler::zero, msw_is_zero);
5287    __ addl(Rdst, BitsPerInt);
5288    __ jmpb(not_zero);
5289    __ bind(msw_is_zero);
5290    __ bsrl(Rdst, Rsrc);
5291    __ jccb(Assembler::notZero, not_zero);
5292    __ movl(Rdst, -1);
5293    __ bind(not_zero);
5294    __ negl(Rdst);
5295    __ addl(Rdst, BitsPerLong - 1);
5296  %}
5297  ins_pipe(ialu_reg);
5298%}
5299
5300instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5301  predicate(UseCountTrailingZerosInstruction);
5302  match(Set dst (CountTrailingZerosI src));
5303  effect(KILL cr);
5304
5305  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5306  ins_encode %{
5307    __ tzcntl($dst$$Register, $src$$Register);
5308  %}
5309  ins_pipe(ialu_reg);
5310%}
5311
5312instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5313  predicate(!UseCountTrailingZerosInstruction);
5314  match(Set dst (CountTrailingZerosI src));
5315  effect(KILL cr);
5316
5317  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5318            "JNZ    done\n\t"
5319            "MOV    $dst, 32\n"
5320      "done:" %}
5321  ins_encode %{
5322    Register Rdst = $dst$$Register;
5323    Label done;
5324    __ bsfl(Rdst, $src$$Register);
5325    __ jccb(Assembler::notZero, done);
5326    __ movl(Rdst, BitsPerInt);
5327    __ bind(done);
5328  %}
5329  ins_pipe(ialu_reg);
5330%}
5331
5332instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5333  predicate(UseCountTrailingZerosInstruction);
5334  match(Set dst (CountTrailingZerosL src));
5335  effect(TEMP dst, KILL cr);
5336
5337  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5338            "JNC    done\n\t"
5339            "TZCNT  $dst, $src.hi\n\t"
5340            "ADD    $dst, 32\n"
5341            "done:" %}
5342  ins_encode %{
5343    Register Rdst = $dst$$Register;
5344    Register Rsrc = $src$$Register;
5345    Label done;
5346    __ tzcntl(Rdst, Rsrc);
5347    __ jccb(Assembler::carryClear, done);
5348    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5349    __ addl(Rdst, BitsPerInt);
5350    __ bind(done);
5351  %}
5352  ins_pipe(ialu_reg);
5353%}
5354
5355instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5356  predicate(!UseCountTrailingZerosInstruction);
5357  match(Set dst (CountTrailingZerosL src));
5358  effect(TEMP dst, KILL cr);
5359
5360  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5361            "JNZ    done\n\t"
5362            "BSF    $dst, $src.hi\n\t"
5363            "JNZ    msw_not_zero\n\t"
5364            "MOV    $dst, 32\n"
5365      "msw_not_zero:\n\t"
5366            "ADD    $dst, 32\n"
5367      "done:" %}
5368  ins_encode %{
5369    Register Rdst = $dst$$Register;
5370    Register Rsrc = $src$$Register;
5371    Label msw_not_zero;
5372    Label done;
5373    __ bsfl(Rdst, Rsrc);
5374    __ jccb(Assembler::notZero, done);
5375    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5376    __ jccb(Assembler::notZero, msw_not_zero);
5377    __ movl(Rdst, BitsPerInt);
5378    __ bind(msw_not_zero);
5379    __ addl(Rdst, BitsPerInt);
5380    __ bind(done);
5381  %}
5382  ins_pipe(ialu_reg);
5383%}
5384
5385
5386//---------- Population Count Instructions -------------------------------------
5387
5388instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5389  predicate(UsePopCountInstruction);
5390  match(Set dst (PopCountI src));
5391  effect(KILL cr);
5392
5393  format %{ "POPCNT $dst, $src" %}
5394  ins_encode %{
5395    __ popcntl($dst$$Register, $src$$Register);
5396  %}
5397  ins_pipe(ialu_reg);
5398%}
5399
5400instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5401  predicate(UsePopCountInstruction);
5402  match(Set dst (PopCountI (LoadI mem)));
5403  effect(KILL cr);
5404
5405  format %{ "POPCNT $dst, $mem" %}
5406  ins_encode %{
5407    __ popcntl($dst$$Register, $mem$$Address);
5408  %}
5409  ins_pipe(ialu_reg);
5410%}
5411
5412// Note: Long.bitCount(long) returns an int.
5413instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5414  predicate(UsePopCountInstruction);
5415  match(Set dst (PopCountL src));
5416  effect(KILL cr, TEMP tmp, TEMP dst);
5417
5418  format %{ "POPCNT $dst, $src.lo\n\t"
5419            "POPCNT $tmp, $src.hi\n\t"
5420            "ADD    $dst, $tmp" %}
5421  ins_encode %{
5422    __ popcntl($dst$$Register, $src$$Register);
5423    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5424    __ addl($dst$$Register, $tmp$$Register);
5425  %}
5426  ins_pipe(ialu_reg);
5427%}
5428
5429// Note: Long.bitCount(long) returns an int.
5430instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5431  predicate(UsePopCountInstruction);
5432  match(Set dst (PopCountL (LoadL mem)));
5433  effect(KILL cr, TEMP tmp, TEMP dst);
5434
5435  format %{ "POPCNT $dst, $mem\n\t"
5436            "POPCNT $tmp, $mem+4\n\t"
5437            "ADD    $dst, $tmp" %}
5438  ins_encode %{
5439    //__ popcntl($dst$$Register, $mem$$Address$$first);
5440    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5441    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5442    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5443    __ addl($dst$$Register, $tmp$$Register);
5444  %}
5445  ins_pipe(ialu_reg);
5446%}
5447
5448
5449//----------Load/Store/Move Instructions---------------------------------------
5450//----------Load Instructions--------------------------------------------------
5451// Load Byte (8bit signed)
5452instruct loadB(xRegI dst, memory mem) %{
5453  match(Set dst (LoadB mem));
5454
5455  ins_cost(125);
5456  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5457
5458  ins_encode %{
5459    __ movsbl($dst$$Register, $mem$$Address);
5460  %}
5461
5462  ins_pipe(ialu_reg_mem);
5463%}
5464
5465// Load Byte (8bit signed) into Long Register
5466instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5467  match(Set dst (ConvI2L (LoadB mem)));
5468  effect(KILL cr);
5469
5470  ins_cost(375);
5471  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5472            "MOV    $dst.hi,$dst.lo\n\t"
5473            "SAR    $dst.hi,7" %}
5474
5475  ins_encode %{
5476    __ movsbl($dst$$Register, $mem$$Address);
5477    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5478    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5479  %}
5480
5481  ins_pipe(ialu_reg_mem);
5482%}
5483
5484// Load Unsigned Byte (8bit UNsigned)
5485instruct loadUB(xRegI dst, memory mem) %{
5486  match(Set dst (LoadUB mem));
5487
5488  ins_cost(125);
5489  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5490
5491  ins_encode %{
5492    __ movzbl($dst$$Register, $mem$$Address);
5493  %}
5494
5495  ins_pipe(ialu_reg_mem);
5496%}
5497
5498// Load Unsigned Byte (8 bit UNsigned) into Long Register
5499instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5500  match(Set dst (ConvI2L (LoadUB mem)));
5501  effect(KILL cr);
5502
5503  ins_cost(250);
5504  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5505            "XOR    $dst.hi,$dst.hi" %}
5506
5507  ins_encode %{
5508    Register Rdst = $dst$$Register;
5509    __ movzbl(Rdst, $mem$$Address);
5510    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5511  %}
5512
5513  ins_pipe(ialu_reg_mem);
5514%}
5515
5516// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5517instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5518  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5519  effect(KILL cr);
5520
5521  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5522            "XOR    $dst.hi,$dst.hi\n\t"
5523            "AND    $dst.lo,right_n_bits($mask, 8)" %}
5524  ins_encode %{
5525    Register Rdst = $dst$$Register;
5526    __ movzbl(Rdst, $mem$$Address);
5527    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5528    __ andl(Rdst, $mask$$constant & right_n_bits(8));
5529  %}
5530  ins_pipe(ialu_reg_mem);
5531%}
5532
5533// Load Short (16bit signed)
5534instruct loadS(rRegI dst, memory mem) %{
5535  match(Set dst (LoadS mem));
5536
5537  ins_cost(125);
5538  format %{ "MOVSX  $dst,$mem\t# short" %}
5539
5540  ins_encode %{
5541    __ movswl($dst$$Register, $mem$$Address);
5542  %}
5543
5544  ins_pipe(ialu_reg_mem);
5545%}
5546
5547// Load Short (16 bit signed) to Byte (8 bit signed)
5548instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5549  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5550
5551  ins_cost(125);
5552  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5553  ins_encode %{
5554    __ movsbl($dst$$Register, $mem$$Address);
5555  %}
5556  ins_pipe(ialu_reg_mem);
5557%}
5558
5559// Load Short (16bit signed) into Long Register
5560instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5561  match(Set dst (ConvI2L (LoadS mem)));
5562  effect(KILL cr);
5563
5564  ins_cost(375);
5565  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5566            "MOV    $dst.hi,$dst.lo\n\t"
5567            "SAR    $dst.hi,15" %}
5568
5569  ins_encode %{
5570    __ movswl($dst$$Register, $mem$$Address);
5571    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5572    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5573  %}
5574
5575  ins_pipe(ialu_reg_mem);
5576%}
5577
5578// Load Unsigned Short/Char (16bit unsigned)
5579instruct loadUS(rRegI dst, memory mem) %{
5580  match(Set dst (LoadUS mem));
5581
5582  ins_cost(125);
5583  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5584
5585  ins_encode %{
5586    __ movzwl($dst$$Register, $mem$$Address);
5587  %}
5588
5589  ins_pipe(ialu_reg_mem);
5590%}
5591
5592// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5593instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5594  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5595
5596  ins_cost(125);
5597  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5598  ins_encode %{
5599    __ movsbl($dst$$Register, $mem$$Address);
5600  %}
5601  ins_pipe(ialu_reg_mem);
5602%}
5603
5604// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5605instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5606  match(Set dst (ConvI2L (LoadUS mem)));
5607  effect(KILL cr);
5608
5609  ins_cost(250);
5610  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5611            "XOR    $dst.hi,$dst.hi" %}
5612
5613  ins_encode %{
5614    __ movzwl($dst$$Register, $mem$$Address);
5615    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5616  %}
5617
5618  ins_pipe(ialu_reg_mem);
5619%}
5620
5621// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5622instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5623  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5624  effect(KILL cr);
5625
5626  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5627            "XOR    $dst.hi,$dst.hi" %}
5628  ins_encode %{
5629    Register Rdst = $dst$$Register;
5630    __ movzbl(Rdst, $mem$$Address);
5631    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5632  %}
5633  ins_pipe(ialu_reg_mem);
5634%}
5635
5636// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5637instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5638  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5639  effect(KILL cr);
5640
5641  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5642            "XOR    $dst.hi,$dst.hi\n\t"
5643            "AND    $dst.lo,right_n_bits($mask, 16)" %}
5644  ins_encode %{
5645    Register Rdst = $dst$$Register;
5646    __ movzwl(Rdst, $mem$$Address);
5647    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5648    __ andl(Rdst, $mask$$constant & right_n_bits(16));
5649  %}
5650  ins_pipe(ialu_reg_mem);
5651%}
5652
5653// Load Integer
5654instruct loadI(rRegI dst, memory mem) %{
5655  match(Set dst (LoadI mem));
5656
5657  ins_cost(125);
5658  format %{ "MOV    $dst,$mem\t# int" %}
5659
5660  ins_encode %{
5661    __ movl($dst$$Register, $mem$$Address);
5662  %}
5663
5664  ins_pipe(ialu_reg_mem);
5665%}
5666
5667// Load Integer (32 bit signed) to Byte (8 bit signed)
5668instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5669  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5670
5671  ins_cost(125);
5672  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5673  ins_encode %{
5674    __ movsbl($dst$$Register, $mem$$Address);
5675  %}
5676  ins_pipe(ialu_reg_mem);
5677%}
5678
5679// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5680instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5681  match(Set dst (AndI (LoadI mem) mask));
5682
5683  ins_cost(125);
5684  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5685  ins_encode %{
5686    __ movzbl($dst$$Register, $mem$$Address);
5687  %}
5688  ins_pipe(ialu_reg_mem);
5689%}
5690
5691// Load Integer (32 bit signed) to Short (16 bit signed)
5692instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5693  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5694
5695  ins_cost(125);
5696  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5697  ins_encode %{
5698    __ movswl($dst$$Register, $mem$$Address);
5699  %}
5700  ins_pipe(ialu_reg_mem);
5701%}
5702
5703// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5704instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5705  match(Set dst (AndI (LoadI mem) mask));
5706
5707  ins_cost(125);
5708  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5709  ins_encode %{
5710    __ movzwl($dst$$Register, $mem$$Address);
5711  %}
5712  ins_pipe(ialu_reg_mem);
5713%}
5714
5715// Load Integer into Long Register
5716instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5717  match(Set dst (ConvI2L (LoadI mem)));
5718  effect(KILL cr);
5719
5720  ins_cost(375);
5721  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5722            "MOV    $dst.hi,$dst.lo\n\t"
5723            "SAR    $dst.hi,31" %}
5724
5725  ins_encode %{
5726    __ movl($dst$$Register, $mem$$Address);
5727    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5728    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5729  %}
5730
5731  ins_pipe(ialu_reg_mem);
5732%}
5733
5734// Load Integer with mask 0xFF into Long Register
5735instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5736  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5737  effect(KILL cr);
5738
5739  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5740            "XOR    $dst.hi,$dst.hi" %}
5741  ins_encode %{
5742    Register Rdst = $dst$$Register;
5743    __ movzbl(Rdst, $mem$$Address);
5744    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5745  %}
5746  ins_pipe(ialu_reg_mem);
5747%}
5748
5749// Load Integer with mask 0xFFFF into Long Register
5750instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5751  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5752  effect(KILL cr);
5753
5754  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5755            "XOR    $dst.hi,$dst.hi" %}
5756  ins_encode %{
5757    Register Rdst = $dst$$Register;
5758    __ movzwl(Rdst, $mem$$Address);
5759    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5760  %}
5761  ins_pipe(ialu_reg_mem);
5762%}
5763
5764// Load Integer with 31-bit mask into Long Register
5765instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5766  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5767  effect(KILL cr);
5768
5769  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5770            "XOR    $dst.hi,$dst.hi\n\t"
5771            "AND    $dst.lo,$mask" %}
5772  ins_encode %{
5773    Register Rdst = $dst$$Register;
5774    __ movl(Rdst, $mem$$Address);
5775    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5776    __ andl(Rdst, $mask$$constant);
5777  %}
5778  ins_pipe(ialu_reg_mem);
5779%}
5780
5781// Load Unsigned Integer into Long Register
5782instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5783  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5784  effect(KILL cr);
5785
5786  ins_cost(250);
5787  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5788            "XOR    $dst.hi,$dst.hi" %}
5789
5790  ins_encode %{
5791    __ movl($dst$$Register, $mem$$Address);
5792    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5793  %}
5794
5795  ins_pipe(ialu_reg_mem);
5796%}
5797
5798// Load Long.  Cannot clobber address while loading, so restrict address
5799// register to ESI
5800instruct loadL(eRegL dst, load_long_memory mem) %{
5801  predicate(!((LoadLNode*)n)->require_atomic_access());
5802  match(Set dst (LoadL mem));
5803
5804  ins_cost(250);
5805  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5806            "MOV    $dst.hi,$mem+4" %}
5807
5808  ins_encode %{
5809    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5810    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5811    __ movl($dst$$Register, Amemlo);
5812    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5813  %}
5814
5815  ins_pipe(ialu_reg_long_mem);
5816%}
5817
5818// Volatile Load Long.  Must be atomic, so do 64-bit FILD
5819// then store it down to the stack and reload on the int
5820// side.
5821instruct loadL_volatile(stackSlotL dst, memory mem) %{
5822  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5823  match(Set dst (LoadL mem));
5824
5825  ins_cost(200);
5826  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5827            "FISTp  $dst" %}
5828  ins_encode(enc_loadL_volatile(mem,dst));
5829  ins_pipe( fpu_reg_mem );
5830%}
5831
5832instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5833  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5834  match(Set dst (LoadL mem));
5835  effect(TEMP tmp);
5836  ins_cost(180);
5837  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5838            "MOVSD  $dst,$tmp" %}
5839  ins_encode %{
5840    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5841    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5842  %}
5843  ins_pipe( pipe_slow );
5844%}
5845
5846instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5847  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5848  match(Set dst (LoadL mem));
5849  effect(TEMP tmp);
5850  ins_cost(160);
5851  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5852            "MOVD   $dst.lo,$tmp\n\t"
5853            "PSRLQ  $tmp,32\n\t"
5854            "MOVD   $dst.hi,$tmp" %}
5855  ins_encode %{
5856    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5857    __ movdl($dst$$Register, $tmp$$XMMRegister);
5858    __ psrlq($tmp$$XMMRegister, 32);
5859    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5860  %}
5861  ins_pipe( pipe_slow );
5862%}
5863
5864// Load Range
5865instruct loadRange(rRegI dst, memory mem) %{
5866  match(Set dst (LoadRange mem));
5867
5868  ins_cost(125);
5869  format %{ "MOV    $dst,$mem" %}
5870  opcode(0x8B);
5871  ins_encode( OpcP, RegMem(dst,mem));
5872  ins_pipe( ialu_reg_mem );
5873%}
5874
5875
5876// Load Pointer
5877instruct loadP(eRegP dst, memory mem) %{
5878  match(Set dst (LoadP mem));
5879
5880  ins_cost(125);
5881  format %{ "MOV    $dst,$mem" %}
5882  opcode(0x8B);
5883  ins_encode( OpcP, RegMem(dst,mem));
5884  ins_pipe( ialu_reg_mem );
5885%}
5886
5887// Load Klass Pointer
5888instruct loadKlass(eRegP dst, memory mem) %{
5889  match(Set dst (LoadKlass mem));
5890
5891  ins_cost(125);
5892  format %{ "MOV    $dst,$mem" %}
5893  opcode(0x8B);
5894  ins_encode( OpcP, RegMem(dst,mem));
5895  ins_pipe( ialu_reg_mem );
5896%}
5897
5898// Load Double
5899instruct loadDPR(regDPR dst, memory mem) %{
5900  predicate(UseSSE<=1);
5901  match(Set dst (LoadD mem));
5902
5903  ins_cost(150);
5904  format %{ "FLD_D  ST,$mem\n\t"
5905            "FSTP   $dst" %}
5906  opcode(0xDD);               /* DD /0 */
5907  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5908              Pop_Reg_DPR(dst) );
5909  ins_pipe( fpu_reg_mem );
5910%}
5911
5912// Load Double to XMM
5913instruct loadD(regD dst, memory mem) %{
5914  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5915  match(Set dst (LoadD mem));
5916  ins_cost(145);
5917  format %{ "MOVSD  $dst,$mem" %}
5918  ins_encode %{
5919    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5920  %}
5921  ins_pipe( pipe_slow );
5922%}
5923
5924instruct loadD_partial(regD dst, memory mem) %{
5925  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5926  match(Set dst (LoadD mem));
5927  ins_cost(145);
5928  format %{ "MOVLPD $dst,$mem" %}
5929  ins_encode %{
5930    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5931  %}
5932  ins_pipe( pipe_slow );
5933%}
5934
5935// Load to XMM register (single-precision floating point)
5936// MOVSS instruction
5937instruct loadF(regF dst, memory mem) %{
5938  predicate(UseSSE>=1);
5939  match(Set dst (LoadF mem));
5940  ins_cost(145);
5941  format %{ "MOVSS  $dst,$mem" %}
5942  ins_encode %{
5943    __ movflt ($dst$$XMMRegister, $mem$$Address);
5944  %}
5945  ins_pipe( pipe_slow );
5946%}
5947
5948// Load Float
5949instruct loadFPR(regFPR dst, memory mem) %{
5950  predicate(UseSSE==0);
5951  match(Set dst (LoadF mem));
5952
5953  ins_cost(150);
5954  format %{ "FLD_S  ST,$mem\n\t"
5955            "FSTP   $dst" %}
5956  opcode(0xD9);               /* D9 /0 */
5957  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5958              Pop_Reg_FPR(dst) );
5959  ins_pipe( fpu_reg_mem );
5960%}
5961
5962// Load Effective Address
5963instruct leaP8(eRegP dst, indOffset8 mem) %{
5964  match(Set dst mem);
5965
5966  ins_cost(110);
5967  format %{ "LEA    $dst,$mem" %}
5968  opcode(0x8D);
5969  ins_encode( OpcP, RegMem(dst,mem));
5970  ins_pipe( ialu_reg_reg_fat );
5971%}
5972
5973instruct leaP32(eRegP dst, indOffset32 mem) %{
5974  match(Set dst mem);
5975
5976  ins_cost(110);
5977  format %{ "LEA    $dst,$mem" %}
5978  opcode(0x8D);
5979  ins_encode( OpcP, RegMem(dst,mem));
5980  ins_pipe( ialu_reg_reg_fat );
5981%}
5982
5983instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5984  match(Set dst mem);
5985
5986  ins_cost(110);
5987  format %{ "LEA    $dst,$mem" %}
5988  opcode(0x8D);
5989  ins_encode( OpcP, RegMem(dst,mem));
5990  ins_pipe( ialu_reg_reg_fat );
5991%}
5992
5993instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5994  match(Set dst mem);
5995
5996  ins_cost(110);
5997  format %{ "LEA    $dst,$mem" %}
5998  opcode(0x8D);
5999  ins_encode( OpcP, RegMem(dst,mem));
6000  ins_pipe( ialu_reg_reg_fat );
6001%}
6002
6003instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6004  match(Set dst mem);
6005
6006  ins_cost(110);
6007  format %{ "LEA    $dst,$mem" %}
6008  opcode(0x8D);
6009  ins_encode( OpcP, RegMem(dst,mem));
6010  ins_pipe( ialu_reg_reg_fat );
6011%}
6012
6013// Load Constant
6014instruct loadConI(rRegI dst, immI src) %{
6015  match(Set dst src);
6016
6017  format %{ "MOV    $dst,$src" %}
6018  ins_encode( LdImmI(dst, src) );
6019  ins_pipe( ialu_reg_fat );
6020%}
6021
6022// Load Constant zero
6023instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
6024  match(Set dst src);
6025  effect(KILL cr);
6026
6027  ins_cost(50);
6028  format %{ "XOR    $dst,$dst" %}
6029  opcode(0x33);  /* + rd */
6030  ins_encode( OpcP, RegReg( dst, dst ) );
6031  ins_pipe( ialu_reg );
6032%}
6033
6034instruct loadConP(eRegP dst, immP src) %{
6035  match(Set dst src);
6036
6037  format %{ "MOV    $dst,$src" %}
6038  opcode(0xB8);  /* + rd */
6039  ins_encode( LdImmP(dst, src) );
6040  ins_pipe( ialu_reg_fat );
6041%}
6042
6043instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6044  match(Set dst src);
6045  effect(KILL cr);
6046  ins_cost(200);
6047  format %{ "MOV    $dst.lo,$src.lo\n\t"
6048            "MOV    $dst.hi,$src.hi" %}
6049  opcode(0xB8);
6050  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6051  ins_pipe( ialu_reg_long_fat );
6052%}
6053
6054instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6055  match(Set dst src);
6056  effect(KILL cr);
6057  ins_cost(150);
6058  format %{ "XOR    $dst.lo,$dst.lo\n\t"
6059            "XOR    $dst.hi,$dst.hi" %}
6060  opcode(0x33,0x33);
6061  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6062  ins_pipe( ialu_reg_long );
6063%}
6064
6065// The instruction usage is guarded by predicate in operand immFPR().
6066instruct loadConFPR(regFPR dst, immFPR con) %{
6067  match(Set dst con);
6068  ins_cost(125);
6069  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6070            "FSTP   $dst" %}
6071  ins_encode %{
6072    __ fld_s($constantaddress($con));
6073    __ fstp_d($dst$$reg);
6074  %}
6075  ins_pipe(fpu_reg_con);
6076%}
6077
6078// The instruction usage is guarded by predicate in operand immFPR0().
6079instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6080  match(Set dst con);
6081  ins_cost(125);
6082  format %{ "FLDZ   ST\n\t"
6083            "FSTP   $dst" %}
6084  ins_encode %{
6085    __ fldz();
6086    __ fstp_d($dst$$reg);
6087  %}
6088  ins_pipe(fpu_reg_con);
6089%}
6090
6091// The instruction usage is guarded by predicate in operand immFPR1().
6092instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6093  match(Set dst con);
6094  ins_cost(125);
6095  format %{ "FLD1   ST\n\t"
6096            "FSTP   $dst" %}
6097  ins_encode %{
6098    __ fld1();
6099    __ fstp_d($dst$$reg);
6100  %}
6101  ins_pipe(fpu_reg_con);
6102%}
6103
6104// The instruction usage is guarded by predicate in operand immF().
6105instruct loadConF(regF dst, immF con) %{
6106  match(Set dst con);
6107  ins_cost(125);
6108  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6109  ins_encode %{
6110    __ movflt($dst$$XMMRegister, $constantaddress($con));
6111  %}
6112  ins_pipe(pipe_slow);
6113%}
6114
6115// The instruction usage is guarded by predicate in operand immF0().
6116instruct loadConF0(regF dst, immF0 src) %{
6117  match(Set dst src);
6118  ins_cost(100);
6119  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6120  ins_encode %{
6121    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6122  %}
6123  ins_pipe(pipe_slow);
6124%}
6125
6126// The instruction usage is guarded by predicate in operand immDPR().
6127instruct loadConDPR(regDPR dst, immDPR con) %{
6128  match(Set dst con);
6129  ins_cost(125);
6130
6131  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6132            "FSTP   $dst" %}
6133  ins_encode %{
6134    __ fld_d($constantaddress($con));
6135    __ fstp_d($dst$$reg);
6136  %}
6137  ins_pipe(fpu_reg_con);
6138%}
6139
6140// The instruction usage is guarded by predicate in operand immDPR0().
6141instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6142  match(Set dst con);
6143  ins_cost(125);
6144
6145  format %{ "FLDZ   ST\n\t"
6146            "FSTP   $dst" %}
6147  ins_encode %{
6148    __ fldz();
6149    __ fstp_d($dst$$reg);
6150  %}
6151  ins_pipe(fpu_reg_con);
6152%}
6153
6154// The instruction usage is guarded by predicate in operand immDPR1().
6155instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6156  match(Set dst con);
6157  ins_cost(125);
6158
6159  format %{ "FLD1   ST\n\t"
6160            "FSTP   $dst" %}
6161  ins_encode %{
6162    __ fld1();
6163    __ fstp_d($dst$$reg);
6164  %}
6165  ins_pipe(fpu_reg_con);
6166%}
6167
6168// The instruction usage is guarded by predicate in operand immD().
6169instruct loadConD(regD dst, immD con) %{
6170  match(Set dst con);
6171  ins_cost(125);
6172  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6173  ins_encode %{
6174    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6175  %}
6176  ins_pipe(pipe_slow);
6177%}
6178
6179// The instruction usage is guarded by predicate in operand immD0().
6180instruct loadConD0(regD dst, immD0 src) %{
6181  match(Set dst src);
6182  ins_cost(100);
6183  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6184  ins_encode %{
6185    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6186  %}
6187  ins_pipe( pipe_slow );
6188%}
6189
6190// Load Stack Slot
6191instruct loadSSI(rRegI dst, stackSlotI src) %{
6192  match(Set dst src);
6193  ins_cost(125);
6194
6195  format %{ "MOV    $dst,$src" %}
6196  opcode(0x8B);
6197  ins_encode( OpcP, RegMem(dst,src));
6198  ins_pipe( ialu_reg_mem );
6199%}
6200
6201instruct loadSSL(eRegL dst, stackSlotL src) %{
6202  match(Set dst src);
6203
6204  ins_cost(200);
6205  format %{ "MOV    $dst,$src.lo\n\t"
6206            "MOV    $dst+4,$src.hi" %}
6207  opcode(0x8B, 0x8B);
6208  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6209  ins_pipe( ialu_mem_long_reg );
6210%}
6211
6212// Load Stack Slot
6213instruct loadSSP(eRegP dst, stackSlotP src) %{
6214  match(Set dst src);
6215  ins_cost(125);
6216
6217  format %{ "MOV    $dst,$src" %}
6218  opcode(0x8B);
6219  ins_encode( OpcP, RegMem(dst,src));
6220  ins_pipe( ialu_reg_mem );
6221%}
6222
6223// Load Stack Slot
6224instruct loadSSF(regFPR dst, stackSlotF src) %{
6225  match(Set dst src);
6226  ins_cost(125);
6227
6228  format %{ "FLD_S  $src\n\t"
6229            "FSTP   $dst" %}
6230  opcode(0xD9);               /* D9 /0, FLD m32real */
6231  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6232              Pop_Reg_FPR(dst) );
6233  ins_pipe( fpu_reg_mem );
6234%}
6235
6236// Load Stack Slot
6237instruct loadSSD(regDPR dst, stackSlotD src) %{
6238  match(Set dst src);
6239  ins_cost(125);
6240
6241  format %{ "FLD_D  $src\n\t"
6242            "FSTP   $dst" %}
6243  opcode(0xDD);               /* DD /0, FLD m64real */
6244  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6245              Pop_Reg_DPR(dst) );
6246  ins_pipe( fpu_reg_mem );
6247%}
6248
6249// Prefetch instructions for allocation.
6250// Must be safe to execute with invalid address (cannot fault).
6251
6252instruct prefetchAlloc0( memory mem ) %{
6253  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6254  match(PrefetchAllocation mem);
6255  ins_cost(0);
6256  size(0);
6257  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6258  ins_encode();
6259  ins_pipe(empty);
6260%}
6261
6262instruct prefetchAlloc( memory mem ) %{
6263  predicate(AllocatePrefetchInstr==3);
6264  match( PrefetchAllocation mem );
6265  ins_cost(100);
6266
6267  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6268  ins_encode %{
6269    __ prefetchw($mem$$Address);
6270  %}
6271  ins_pipe(ialu_mem);
6272%}
6273
6274instruct prefetchAllocNTA( memory mem ) %{
6275  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6276  match(PrefetchAllocation mem);
6277  ins_cost(100);
6278
6279  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6280  ins_encode %{
6281    __ prefetchnta($mem$$Address);
6282  %}
6283  ins_pipe(ialu_mem);
6284%}
6285
6286instruct prefetchAllocT0( memory mem ) %{
6287  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6288  match(PrefetchAllocation mem);
6289  ins_cost(100);
6290
6291  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6292  ins_encode %{
6293    __ prefetcht0($mem$$Address);
6294  %}
6295  ins_pipe(ialu_mem);
6296%}
6297
6298instruct prefetchAllocT2( memory mem ) %{
6299  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6300  match(PrefetchAllocation mem);
6301  ins_cost(100);
6302
6303  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6304  ins_encode %{
6305    __ prefetcht2($mem$$Address);
6306  %}
6307  ins_pipe(ialu_mem);
6308%}
6309
6310//----------Store Instructions-------------------------------------------------
6311
6312// Store Byte
6313instruct storeB(memory mem, xRegI src) %{
6314  match(Set mem (StoreB mem src));
6315
6316  ins_cost(125);
6317  format %{ "MOV8   $mem,$src" %}
6318  opcode(0x88);
6319  ins_encode( OpcP, RegMem( src, mem ) );
6320  ins_pipe( ialu_mem_reg );
6321%}
6322
6323// Store Char/Short
6324instruct storeC(memory mem, rRegI src) %{
6325  match(Set mem (StoreC mem src));
6326
6327  ins_cost(125);
6328  format %{ "MOV16  $mem,$src" %}
6329  opcode(0x89, 0x66);
6330  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6331  ins_pipe( ialu_mem_reg );
6332%}
6333
6334// Store Integer
6335instruct storeI(memory mem, rRegI src) %{
6336  match(Set mem (StoreI mem src));
6337
6338  ins_cost(125);
6339  format %{ "MOV    $mem,$src" %}
6340  opcode(0x89);
6341  ins_encode( OpcP, RegMem( src, mem ) );
6342  ins_pipe( ialu_mem_reg );
6343%}
6344
6345// Store Long
6346instruct storeL(long_memory mem, eRegL src) %{
6347  predicate(!((StoreLNode*)n)->require_atomic_access());
6348  match(Set mem (StoreL mem src));
6349
6350  ins_cost(200);
6351  format %{ "MOV    $mem,$src.lo\n\t"
6352            "MOV    $mem+4,$src.hi" %}
6353  opcode(0x89, 0x89);
6354  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6355  ins_pipe( ialu_mem_long_reg );
6356%}
6357
6358// Store Long to Integer
6359instruct storeL2I(memory mem, eRegL src) %{
6360  match(Set mem (StoreI mem (ConvL2I src)));
6361
6362  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6363  ins_encode %{
6364    __ movl($mem$$Address, $src$$Register);
6365  %}
6366  ins_pipe(ialu_mem_reg);
6367%}
6368
6369// Volatile Store Long.  Must be atomic, so move it into
6370// the FP TOS and then do a 64-bit FIST.  Has to probe the
6371// target address before the store (for null-ptr checks)
6372// so the memory operand is used twice in the encoding.
6373instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6374  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6375  match(Set mem (StoreL mem src));
6376  effect( KILL cr );
6377  ins_cost(400);
6378  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6379            "FILD   $src\n\t"
6380            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6381  opcode(0x3B);
6382  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6383  ins_pipe( fpu_reg_mem );
6384%}
6385
6386instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6387  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6388  match(Set mem (StoreL mem src));
6389  effect( TEMP tmp, KILL cr );
6390  ins_cost(380);
6391  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6392            "MOVSD  $tmp,$src\n\t"
6393            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6394  ins_encode %{
6395    __ cmpl(rax, $mem$$Address);
6396    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6397    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6398  %}
6399  ins_pipe( pipe_slow );
6400%}
6401
6402instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6403  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6404  match(Set mem (StoreL mem src));
6405  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6406  ins_cost(360);
6407  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6408            "MOVD   $tmp,$src.lo\n\t"
6409            "MOVD   $tmp2,$src.hi\n\t"
6410            "PUNPCKLDQ $tmp,$tmp2\n\t"
6411            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6412  ins_encode %{
6413    __ cmpl(rax, $mem$$Address);
6414    __ movdl($tmp$$XMMRegister, $src$$Register);
6415    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6416    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6417    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6418  %}
6419  ins_pipe( pipe_slow );
6420%}
6421
6422// Store Pointer; for storing unknown oops and raw pointers
6423instruct storeP(memory mem, anyRegP src) %{
6424  match(Set mem (StoreP mem src));
6425
6426  ins_cost(125);
6427  format %{ "MOV    $mem,$src" %}
6428  opcode(0x89);
6429  ins_encode( OpcP, RegMem( src, mem ) );
6430  ins_pipe( ialu_mem_reg );
6431%}
6432
6433// Store Integer Immediate
6434instruct storeImmI(memory mem, immI src) %{
6435  match(Set mem (StoreI mem src));
6436
6437  ins_cost(150);
6438  format %{ "MOV    $mem,$src" %}
6439  opcode(0xC7);               /* C7 /0 */
6440  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6441  ins_pipe( ialu_mem_imm );
6442%}
6443
6444// Store Short/Char Immediate
6445instruct storeImmI16(memory mem, immI16 src) %{
6446  predicate(UseStoreImmI16);
6447  match(Set mem (StoreC mem src));
6448
6449  ins_cost(150);
6450  format %{ "MOV16  $mem,$src" %}
6451  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6452  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6453  ins_pipe( ialu_mem_imm );
6454%}
6455
6456// Store Pointer Immediate; null pointers or constant oops that do not
6457// need card-mark barriers.
6458instruct storeImmP(memory mem, immP src) %{
6459  match(Set mem (StoreP mem src));
6460
6461  ins_cost(150);
6462  format %{ "MOV    $mem,$src" %}
6463  opcode(0xC7);               /* C7 /0 */
6464  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6465  ins_pipe( ialu_mem_imm );
6466%}
6467
6468// Store Byte Immediate
6469instruct storeImmB(memory mem, immI8 src) %{
6470  match(Set mem (StoreB mem src));
6471
6472  ins_cost(150);
6473  format %{ "MOV8   $mem,$src" %}
6474  opcode(0xC6);               /* C6 /0 */
6475  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6476  ins_pipe( ialu_mem_imm );
6477%}
6478
6479// Store CMS card-mark Immediate
6480instruct storeImmCM(memory mem, immI8 src) %{
6481  match(Set mem (StoreCM mem src));
6482
6483  ins_cost(150);
6484  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6485  opcode(0xC6);               /* C6 /0 */
6486  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6487  ins_pipe( ialu_mem_imm );
6488%}
6489
6490// Store Double
6491instruct storeDPR( memory mem, regDPR1 src) %{
6492  predicate(UseSSE<=1);
6493  match(Set mem (StoreD mem src));
6494
6495  ins_cost(100);
6496  format %{ "FST_D  $mem,$src" %}
6497  opcode(0xDD);       /* DD /2 */
6498  ins_encode( enc_FPR_store(mem,src) );
6499  ins_pipe( fpu_mem_reg );
6500%}
6501
6502// Store double does rounding on x86
6503instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6504  predicate(UseSSE<=1);
6505  match(Set mem (StoreD mem (RoundDouble src)));
6506
6507  ins_cost(100);
6508  format %{ "FST_D  $mem,$src\t# round" %}
6509  opcode(0xDD);       /* DD /2 */
6510  ins_encode( enc_FPR_store(mem,src) );
6511  ins_pipe( fpu_mem_reg );
6512%}
6513
6514// Store XMM register to memory (double-precision floating points)
6515// MOVSD instruction
6516instruct storeD(memory mem, regD src) %{
6517  predicate(UseSSE>=2);
6518  match(Set mem (StoreD mem src));
6519  ins_cost(95);
6520  format %{ "MOVSD  $mem,$src" %}
6521  ins_encode %{
6522    __ movdbl($mem$$Address, $src$$XMMRegister);
6523  %}
6524  ins_pipe( pipe_slow );
6525%}
6526
6527// Store XMM register to memory (single-precision floating point)
6528// MOVSS instruction
6529instruct storeF(memory mem, regF src) %{
6530  predicate(UseSSE>=1);
6531  match(Set mem (StoreF mem src));
6532  ins_cost(95);
6533  format %{ "MOVSS  $mem,$src" %}
6534  ins_encode %{
6535    __ movflt($mem$$Address, $src$$XMMRegister);
6536  %}
6537  ins_pipe( pipe_slow );
6538%}
6539
6540// Store Float
6541instruct storeFPR( memory mem, regFPR1 src) %{
6542  predicate(UseSSE==0);
6543  match(Set mem (StoreF mem src));
6544
6545  ins_cost(100);
6546  format %{ "FST_S  $mem,$src" %}
6547  opcode(0xD9);       /* D9 /2 */
6548  ins_encode( enc_FPR_store(mem,src) );
6549  ins_pipe( fpu_mem_reg );
6550%}
6551
6552// Store Float does rounding on x86
6553instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6554  predicate(UseSSE==0);
6555  match(Set mem (StoreF mem (RoundFloat src)));
6556
6557  ins_cost(100);
6558  format %{ "FST_S  $mem,$src\t# round" %}
6559  opcode(0xD9);       /* D9 /2 */
6560  ins_encode( enc_FPR_store(mem,src) );
6561  ins_pipe( fpu_mem_reg );
6562%}
6563
6564// Store Float does rounding on x86
6565instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6566  predicate(UseSSE<=1);
6567  match(Set mem (StoreF mem (ConvD2F src)));
6568
6569  ins_cost(100);
6570  format %{ "FST_S  $mem,$src\t# D-round" %}
6571  opcode(0xD9);       /* D9 /2 */
6572  ins_encode( enc_FPR_store(mem,src) );
6573  ins_pipe( fpu_mem_reg );
6574%}
6575
6576// Store immediate Float value (it is faster than store from FPU register)
6577// The instruction usage is guarded by predicate in operand immFPR().
6578instruct storeFPR_imm( memory mem, immFPR src) %{
6579  match(Set mem (StoreF mem src));
6580
6581  ins_cost(50);
6582  format %{ "MOV    $mem,$src\t# store float" %}
6583  opcode(0xC7);               /* C7 /0 */
6584  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6585  ins_pipe( ialu_mem_imm );
6586%}
6587
6588// Store immediate Float value (it is faster than store from XMM register)
6589// The instruction usage is guarded by predicate in operand immF().
6590instruct storeF_imm( memory mem, immF src) %{
6591  match(Set mem (StoreF mem src));
6592
6593  ins_cost(50);
6594  format %{ "MOV    $mem,$src\t# store float" %}
6595  opcode(0xC7);               /* C7 /0 */
6596  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6597  ins_pipe( ialu_mem_imm );
6598%}
6599
6600// Store Integer to stack slot
6601instruct storeSSI(stackSlotI dst, rRegI src) %{
6602  match(Set dst src);
6603
6604  ins_cost(100);
6605  format %{ "MOV    $dst,$src" %}
6606  opcode(0x89);
6607  ins_encode( OpcPRegSS( dst, src ) );
6608  ins_pipe( ialu_mem_reg );
6609%}
6610
6611// Store Integer to stack slot
6612instruct storeSSP(stackSlotP dst, eRegP src) %{
6613  match(Set dst src);
6614
6615  ins_cost(100);
6616  format %{ "MOV    $dst,$src" %}
6617  opcode(0x89);
6618  ins_encode( OpcPRegSS( dst, src ) );
6619  ins_pipe( ialu_mem_reg );
6620%}
6621
6622// Store Long to stack slot
6623instruct storeSSL(stackSlotL dst, eRegL src) %{
6624  match(Set dst src);
6625
6626  ins_cost(200);
6627  format %{ "MOV    $dst,$src.lo\n\t"
6628            "MOV    $dst+4,$src.hi" %}
6629  opcode(0x89, 0x89);
6630  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6631  ins_pipe( ialu_mem_long_reg );
6632%}
6633
6634//----------MemBar Instructions-----------------------------------------------
6635// Memory barrier flavors
6636
6637instruct membar_acquire() %{
6638  match(MemBarAcquire);
6639  match(LoadFence);
6640  ins_cost(400);
6641
6642  size(0);
6643  format %{ "MEMBAR-acquire ! (empty encoding)" %}
6644  ins_encode();
6645  ins_pipe(empty);
6646%}
6647
6648instruct membar_acquire_lock() %{
6649  match(MemBarAcquireLock);
6650  ins_cost(0);
6651
6652  size(0);
6653  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6654  ins_encode( );
6655  ins_pipe(empty);
6656%}
6657
6658instruct membar_release() %{
6659  match(MemBarRelease);
6660  match(StoreFence);
6661  ins_cost(400);
6662
6663  size(0);
6664  format %{ "MEMBAR-release ! (empty encoding)" %}
6665  ins_encode( );
6666  ins_pipe(empty);
6667%}
6668
6669instruct membar_release_lock() %{
6670  match(MemBarReleaseLock);
6671  ins_cost(0);
6672
6673  size(0);
6674  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6675  ins_encode( );
6676  ins_pipe(empty);
6677%}
6678
6679instruct membar_volatile(eFlagsReg cr) %{
6680  match(MemBarVolatile);
6681  effect(KILL cr);
6682  ins_cost(400);
6683
6684  format %{
6685    $$template
6686    if (os::is_MP()) {
6687      $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6688    } else {
6689      $$emit$$"MEMBAR-volatile ! (empty encoding)"
6690    }
6691  %}
6692  ins_encode %{
6693    __ membar(Assembler::StoreLoad);
6694  %}
6695  ins_pipe(pipe_slow);
6696%}
6697
6698instruct unnecessary_membar_volatile() %{
6699  match(MemBarVolatile);
6700  predicate(Matcher::post_store_load_barrier(n));
6701  ins_cost(0);
6702
6703  size(0);
6704  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6705  ins_encode( );
6706  ins_pipe(empty);
6707%}
6708
6709instruct membar_storestore() %{
6710  match(MemBarStoreStore);
6711  ins_cost(0);
6712
6713  size(0);
6714  format %{ "MEMBAR-storestore (empty encoding)" %}
6715  ins_encode( );
6716  ins_pipe(empty);
6717%}
6718
6719//----------Move Instructions--------------------------------------------------
6720instruct castX2P(eAXRegP dst, eAXRegI src) %{
6721  match(Set dst (CastX2P src));
6722  format %{ "# X2P  $dst, $src" %}
6723  ins_encode( /*empty encoding*/ );
6724  ins_cost(0);
6725  ins_pipe(empty);
6726%}
6727
6728instruct castP2X(rRegI dst, eRegP src ) %{
6729  match(Set dst (CastP2X src));
6730  ins_cost(50);
6731  format %{ "MOV    $dst, $src\t# CastP2X" %}
6732  ins_encode( enc_Copy( dst, src) );
6733  ins_pipe( ialu_reg_reg );
6734%}
6735
6736//----------Conditional Move---------------------------------------------------
6737// Conditional move
6738instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6739  predicate(!VM_Version::supports_cmov() );
6740  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6741  ins_cost(200);
6742  format %{ "J$cop,us skip\t# signed cmove\n\t"
6743            "MOV    $dst,$src\n"
6744      "skip:" %}
6745  ins_encode %{
6746    Label Lskip;
6747    // Invert sense of branch from sense of CMOV
6748    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6749    __ movl($dst$$Register, $src$$Register);
6750    __ bind(Lskip);
6751  %}
6752  ins_pipe( pipe_cmov_reg );
6753%}
6754
6755instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6756  predicate(!VM_Version::supports_cmov() );
6757  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6758  ins_cost(200);
6759  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6760            "MOV    $dst,$src\n"
6761      "skip:" %}
6762  ins_encode %{
6763    Label Lskip;
6764    // Invert sense of branch from sense of CMOV
6765    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6766    __ movl($dst$$Register, $src$$Register);
6767    __ bind(Lskip);
6768  %}
6769  ins_pipe( pipe_cmov_reg );
6770%}
6771
6772instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6773  predicate(VM_Version::supports_cmov() );
6774  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6775  ins_cost(200);
6776  format %{ "CMOV$cop $dst,$src" %}
6777  opcode(0x0F,0x40);
6778  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6779  ins_pipe( pipe_cmov_reg );
6780%}
6781
6782instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6783  predicate(VM_Version::supports_cmov() );
6784  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6785  ins_cost(200);
6786  format %{ "CMOV$cop $dst,$src" %}
6787  opcode(0x0F,0x40);
6788  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6789  ins_pipe( pipe_cmov_reg );
6790%}
6791
6792instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6793  predicate(VM_Version::supports_cmov() );
6794  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6795  ins_cost(200);
6796  expand %{
6797    cmovI_regU(cop, cr, dst, src);
6798  %}
6799%}
6800
6801// Conditional move
6802instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6803  predicate(VM_Version::supports_cmov() );
6804  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6805  ins_cost(250);
6806  format %{ "CMOV$cop $dst,$src" %}
6807  opcode(0x0F,0x40);
6808  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6809  ins_pipe( pipe_cmov_mem );
6810%}
6811
6812// Conditional move
6813instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6814  predicate(VM_Version::supports_cmov() );
6815  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6816  ins_cost(250);
6817  format %{ "CMOV$cop $dst,$src" %}
6818  opcode(0x0F,0x40);
6819  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6820  ins_pipe( pipe_cmov_mem );
6821%}
6822
6823instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6824  predicate(VM_Version::supports_cmov() );
6825  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6826  ins_cost(250);
6827  expand %{
6828    cmovI_memU(cop, cr, dst, src);
6829  %}
6830%}
6831
6832// Conditional move
6833instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6834  predicate(VM_Version::supports_cmov() );
6835  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6836  ins_cost(200);
6837  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6838  opcode(0x0F,0x40);
6839  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6840  ins_pipe( pipe_cmov_reg );
6841%}
6842
6843// Conditional move (non-P6 version)
6844// Note:  a CMoveP is generated for  stubs and native wrappers
6845//        regardless of whether we are on a P6, so we
6846//        emulate a cmov here
6847instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6848  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6849  ins_cost(300);
6850  format %{ "Jn$cop   skip\n\t"
6851          "MOV    $dst,$src\t# pointer\n"
6852      "skip:" %}
6853  opcode(0x8b);
6854  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6855  ins_pipe( pipe_cmov_reg );
6856%}
6857
6858// Conditional move
6859instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6860  predicate(VM_Version::supports_cmov() );
6861  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6862  ins_cost(200);
6863  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6864  opcode(0x0F,0x40);
6865  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6866  ins_pipe( pipe_cmov_reg );
6867%}
6868
6869instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6870  predicate(VM_Version::supports_cmov() );
6871  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6872  ins_cost(200);
6873  expand %{
6874    cmovP_regU(cop, cr, dst, src);
6875  %}
6876%}
6877
6878// DISABLED: Requires the ADLC to emit a bottom_type call that
6879// correctly meets the two pointer arguments; one is an incoming
6880// register but the other is a memory operand.  ALSO appears to
6881// be buggy with implicit null checks.
6882//
6883//// Conditional move
6884//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6885//  predicate(VM_Version::supports_cmov() );
6886//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6887//  ins_cost(250);
6888//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6889//  opcode(0x0F,0x40);
6890//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6891//  ins_pipe( pipe_cmov_mem );
6892//%}
6893//
6894//// Conditional move
6895//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6896//  predicate(VM_Version::supports_cmov() );
6897//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6898//  ins_cost(250);
6899//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6900//  opcode(0x0F,0x40);
6901//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6902//  ins_pipe( pipe_cmov_mem );
6903//%}
6904
6905// Conditional move
6906instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6907  predicate(UseSSE<=1);
6908  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6909  ins_cost(200);
6910  format %{ "FCMOV$cop $dst,$src\t# double" %}
6911  opcode(0xDA);
6912  ins_encode( enc_cmov_dpr(cop,src) );
6913  ins_pipe( pipe_cmovDPR_reg );
6914%}
6915
6916// Conditional move
6917instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6918  predicate(UseSSE==0);
6919  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6920  ins_cost(200);
6921  format %{ "FCMOV$cop $dst,$src\t# float" %}
6922  opcode(0xDA);
6923  ins_encode( enc_cmov_dpr(cop,src) );
6924  ins_pipe( pipe_cmovDPR_reg );
6925%}
6926
6927// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6928instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6929  predicate(UseSSE<=1);
6930  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6931  ins_cost(200);
6932  format %{ "Jn$cop   skip\n\t"
6933            "MOV    $dst,$src\t# double\n"
6934      "skip:" %}
6935  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6936  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6937  ins_pipe( pipe_cmovDPR_reg );
6938%}
6939
6940// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6941instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6942  predicate(UseSSE==0);
6943  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6944  ins_cost(200);
6945  format %{ "Jn$cop    skip\n\t"
6946            "MOV    $dst,$src\t# float\n"
6947      "skip:" %}
6948  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6949  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6950  ins_pipe( pipe_cmovDPR_reg );
6951%}
6952
6953// No CMOVE with SSE/SSE2
6954instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6955  predicate (UseSSE>=1);
6956  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6957  ins_cost(200);
6958  format %{ "Jn$cop   skip\n\t"
6959            "MOVSS  $dst,$src\t# float\n"
6960      "skip:" %}
6961  ins_encode %{
6962    Label skip;
6963    // Invert sense of branch from sense of CMOV
6964    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6965    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6966    __ bind(skip);
6967  %}
6968  ins_pipe( pipe_slow );
6969%}
6970
6971// No CMOVE with SSE/SSE2
6972instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6973  predicate (UseSSE>=2);
6974  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6975  ins_cost(200);
6976  format %{ "Jn$cop   skip\n\t"
6977            "MOVSD  $dst,$src\t# float\n"
6978      "skip:" %}
6979  ins_encode %{
6980    Label skip;
6981    // Invert sense of branch from sense of CMOV
6982    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6983    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6984    __ bind(skip);
6985  %}
6986  ins_pipe( pipe_slow );
6987%}
6988
6989// unsigned version
6990instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6991  predicate (UseSSE>=1);
6992  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6993  ins_cost(200);
6994  format %{ "Jn$cop   skip\n\t"
6995            "MOVSS  $dst,$src\t# float\n"
6996      "skip:" %}
6997  ins_encode %{
6998    Label skip;
6999    // Invert sense of branch from sense of CMOV
7000    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7001    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7002    __ bind(skip);
7003  %}
7004  ins_pipe( pipe_slow );
7005%}
7006
7007instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7008  predicate (UseSSE>=1);
7009  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7010  ins_cost(200);
7011  expand %{
7012    fcmovF_regU(cop, cr, dst, src);
7013  %}
7014%}
7015
7016// unsigned version
7017instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7018  predicate (UseSSE>=2);
7019  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7020  ins_cost(200);
7021  format %{ "Jn$cop   skip\n\t"
7022            "MOVSD  $dst,$src\t# float\n"
7023      "skip:" %}
7024  ins_encode %{
7025    Label skip;
7026    // Invert sense of branch from sense of CMOV
7027    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7028    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7029    __ bind(skip);
7030  %}
7031  ins_pipe( pipe_slow );
7032%}
7033
7034instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7035  predicate (UseSSE>=2);
7036  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7037  ins_cost(200);
7038  expand %{
7039    fcmovD_regU(cop, cr, dst, src);
7040  %}
7041%}
7042
7043instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7044  predicate(VM_Version::supports_cmov() );
7045  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7046  ins_cost(200);
7047  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7048            "CMOV$cop $dst.hi,$src.hi" %}
7049  opcode(0x0F,0x40);
7050  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7051  ins_pipe( pipe_cmov_reg_long );
7052%}
7053
7054instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7055  predicate(VM_Version::supports_cmov() );
7056  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7057  ins_cost(200);
7058  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7059            "CMOV$cop $dst.hi,$src.hi" %}
7060  opcode(0x0F,0x40);
7061  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7062  ins_pipe( pipe_cmov_reg_long );
7063%}
7064
7065instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7066  predicate(VM_Version::supports_cmov() );
7067  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7068  ins_cost(200);
7069  expand %{
7070    cmovL_regU(cop, cr, dst, src);
7071  %}
7072%}
7073
7074//----------Arithmetic Instructions--------------------------------------------
7075//----------Addition Instructions----------------------------------------------
7076
7077// Integer Addition Instructions
7078instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7079  match(Set dst (AddI dst src));
7080  effect(KILL cr);
7081
7082  size(2);
7083  format %{ "ADD    $dst,$src" %}
7084  opcode(0x03);
7085  ins_encode( OpcP, RegReg( dst, src) );
7086  ins_pipe( ialu_reg_reg );
7087%}
7088
7089instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7090  match(Set dst (AddI dst src));
7091  effect(KILL cr);
7092
7093  format %{ "ADD    $dst,$src" %}
7094  opcode(0x81, 0x00); /* /0 id */
7095  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7096  ins_pipe( ialu_reg );
7097%}
7098
7099instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7100  predicate(UseIncDec);
7101  match(Set dst (AddI dst src));
7102  effect(KILL cr);
7103
7104  size(1);
7105  format %{ "INC    $dst" %}
7106  opcode(0x40); /*  */
7107  ins_encode( Opc_plus( primary, dst ) );
7108  ins_pipe( ialu_reg );
7109%}
7110
7111instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7112  match(Set dst (AddI src0 src1));
7113  ins_cost(110);
7114
7115  format %{ "LEA    $dst,[$src0 + $src1]" %}
7116  opcode(0x8D); /* 0x8D /r */
7117  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7118  ins_pipe( ialu_reg_reg );
7119%}
7120
7121instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7122  match(Set dst (AddP src0 src1));
7123  ins_cost(110);
7124
7125  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7126  opcode(0x8D); /* 0x8D /r */
7127  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7128  ins_pipe( ialu_reg_reg );
7129%}
7130
7131instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7132  predicate(UseIncDec);
7133  match(Set dst (AddI dst src));
7134  effect(KILL cr);
7135
7136  size(1);
7137  format %{ "DEC    $dst" %}
7138  opcode(0x48); /*  */
7139  ins_encode( Opc_plus( primary, dst ) );
7140  ins_pipe( ialu_reg );
7141%}
7142
7143instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7144  match(Set dst (AddP dst src));
7145  effect(KILL cr);
7146
7147  size(2);
7148  format %{ "ADD    $dst,$src" %}
7149  opcode(0x03);
7150  ins_encode( OpcP, RegReg( dst, src) );
7151  ins_pipe( ialu_reg_reg );
7152%}
7153
7154instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7155  match(Set dst (AddP dst src));
7156  effect(KILL cr);
7157
7158  format %{ "ADD    $dst,$src" %}
7159  opcode(0x81,0x00); /* Opcode 81 /0 id */
7160  // ins_encode( RegImm( dst, src) );
7161  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7162  ins_pipe( ialu_reg );
7163%}
7164
7165instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7166  match(Set dst (AddI dst (LoadI src)));
7167  effect(KILL cr);
7168
7169  ins_cost(125);
7170  format %{ "ADD    $dst,$src" %}
7171  opcode(0x03);
7172  ins_encode( OpcP, RegMem( dst, src) );
7173  ins_pipe( ialu_reg_mem );
7174%}
7175
7176instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7177  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7178  effect(KILL cr);
7179
7180  ins_cost(150);
7181  format %{ "ADD    $dst,$src" %}
7182  opcode(0x01);  /* Opcode 01 /r */
7183  ins_encode( OpcP, RegMem( src, dst ) );
7184  ins_pipe( ialu_mem_reg );
7185%}
7186
7187// Add Memory with Immediate
7188instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7189  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7190  effect(KILL cr);
7191
7192  ins_cost(125);
7193  format %{ "ADD    $dst,$src" %}
7194  opcode(0x81);               /* Opcode 81 /0 id */
7195  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7196  ins_pipe( ialu_mem_imm );
7197%}
7198
7199instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7200  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7201  effect(KILL cr);
7202
7203  ins_cost(125);
7204  format %{ "INC    $dst" %}
7205  opcode(0xFF);               /* Opcode FF /0 */
7206  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7207  ins_pipe( ialu_mem_imm );
7208%}
7209
7210instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7211  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7212  effect(KILL cr);
7213
7214  ins_cost(125);
7215  format %{ "DEC    $dst" %}
7216  opcode(0xFF);               /* Opcode FF /1 */
7217  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7218  ins_pipe( ialu_mem_imm );
7219%}
7220
7221
7222instruct checkCastPP( eRegP dst ) %{
7223  match(Set dst (CheckCastPP dst));
7224
7225  size(0);
7226  format %{ "#checkcastPP of $dst" %}
7227  ins_encode( /*empty encoding*/ );
7228  ins_pipe( empty );
7229%}
7230
7231instruct castPP( eRegP dst ) %{
7232  match(Set dst (CastPP dst));
7233  format %{ "#castPP of $dst" %}
7234  ins_encode( /*empty encoding*/ );
7235  ins_pipe( empty );
7236%}
7237
7238instruct castII( rRegI dst ) %{
7239  match(Set dst (CastII dst));
7240  format %{ "#castII of $dst" %}
7241  ins_encode( /*empty encoding*/ );
7242  ins_cost(0);
7243  ins_pipe( empty );
7244%}
7245
7246
7247// Load-locked - same as a regular pointer load when used with compare-swap
7248instruct loadPLocked(eRegP dst, memory mem) %{
7249  match(Set dst (LoadPLocked mem));
7250
7251  ins_cost(125);
7252  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7253  opcode(0x8B);
7254  ins_encode( OpcP, RegMem(dst,mem));
7255  ins_pipe( ialu_reg_mem );
7256%}
7257
7258// Conditional-store of the updated heap-top.
7259// Used during allocation of the shared heap.
7260// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7261instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7262  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7263  // EAX is killed if there is contention, but then it's also unused.
7264  // In the common case of no contention, EAX holds the new oop address.
7265  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7266  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7267  ins_pipe( pipe_cmpxchg );
7268%}
7269
7270// Conditional-store of an int value.
7271// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7272instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7273  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7274  effect(KILL oldval);
7275  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7276  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7277  ins_pipe( pipe_cmpxchg );
7278%}
7279
7280// Conditional-store of a long value.
7281// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7282instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7283  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7284  effect(KILL oldval);
7285  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7286            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7287            "XCHG   EBX,ECX"
7288  %}
7289  ins_encode %{
7290    // Note: we need to swap rbx, and rcx before and after the
7291    //       cmpxchg8 instruction because the instruction uses
7292    //       rcx as the high order word of the new value to store but
7293    //       our register encoding uses rbx.
7294    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7295    if( os::is_MP() )
7296      __ lock();
7297    __ cmpxchg8($mem$$Address);
7298    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7299  %}
7300  ins_pipe( pipe_cmpxchg );
7301%}
7302
7303// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7304
7305instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7306  predicate(VM_Version::supports_cx8());
7307  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7308  match(Set res (WeakCompareAndSwapL mem_ptr (Binary oldval newval)));
7309  effect(KILL cr, KILL oldval);
7310  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7311            "MOV    $res,0\n\t"
7312            "JNE,s  fail\n\t"
7313            "MOV    $res,1\n"
7314          "fail:" %}
7315  ins_encode( enc_cmpxchg8(mem_ptr),
7316              enc_flags_ne_to_boolean(res) );
7317  ins_pipe( pipe_cmpxchg );
7318%}
7319
7320instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7321  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7322  match(Set res (WeakCompareAndSwapP mem_ptr (Binary oldval newval)));
7323  effect(KILL cr, KILL oldval);
7324  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7325            "MOV    $res,0\n\t"
7326            "JNE,s  fail\n\t"
7327            "MOV    $res,1\n"
7328          "fail:" %}
7329  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7330  ins_pipe( pipe_cmpxchg );
7331%}
7332
7333instruct compareAndSwapB( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7334  match(Set res (CompareAndSwapB mem_ptr (Binary oldval newval)));
7335  match(Set res (WeakCompareAndSwapB mem_ptr (Binary oldval newval)));
7336  effect(KILL cr, KILL oldval);
7337  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7338            "MOV    $res,0\n\t"
7339            "JNE,s  fail\n\t"
7340            "MOV    $res,1\n"
7341          "fail:" %}
7342  ins_encode( enc_cmpxchgb(mem_ptr),
7343              enc_flags_ne_to_boolean(res) );
7344  ins_pipe( pipe_cmpxchg );
7345%}
7346
7347instruct compareAndSwapS( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr ) %{
7348  match(Set res (CompareAndSwapS mem_ptr (Binary oldval newval)));
7349  match(Set res (WeakCompareAndSwapS mem_ptr (Binary oldval newval)));
7350  effect(KILL cr, KILL oldval);
7351  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7352            "MOV    $res,0\n\t"
7353            "JNE,s  fail\n\t"
7354            "MOV    $res,1\n"
7355          "fail:" %}
7356  ins_encode( enc_cmpxchgw(mem_ptr),
7357              enc_flags_ne_to_boolean(res) );
7358  ins_pipe( pipe_cmpxchg );
7359%}
7360
7361instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7362  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7363  match(Set res (WeakCompareAndSwapI mem_ptr (Binary oldval newval)));
7364  effect(KILL cr, KILL oldval);
7365  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7366            "MOV    $res,0\n\t"
7367            "JNE,s  fail\n\t"
7368            "MOV    $res,1\n"
7369          "fail:" %}
7370  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7371  ins_pipe( pipe_cmpxchg );
7372%}
7373
7374instruct compareAndExchangeL( eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7375  predicate(VM_Version::supports_cx8());
7376  match(Set oldval (CompareAndExchangeL mem_ptr (Binary oldval newval)));
7377  effect(KILL cr);
7378  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7379  ins_encode( enc_cmpxchg8(mem_ptr) );
7380  ins_pipe( pipe_cmpxchg );
7381%}
7382
7383instruct compareAndExchangeP( pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7384  match(Set oldval (CompareAndExchangeP mem_ptr (Binary oldval newval)));
7385  effect(KILL cr);
7386  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7387  ins_encode( enc_cmpxchg(mem_ptr) );
7388  ins_pipe( pipe_cmpxchg );
7389%}
7390
7391instruct compareAndExchangeB( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7392  match(Set oldval (CompareAndExchangeB mem_ptr (Binary oldval newval)));
7393  effect(KILL cr);
7394  format %{ "CMPXCHGB [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7395  ins_encode( enc_cmpxchgb(mem_ptr) );
7396  ins_pipe( pipe_cmpxchg );
7397%}
7398
7399instruct compareAndExchangeS( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7400  match(Set oldval (CompareAndExchangeS mem_ptr (Binary oldval newval)));
7401  effect(KILL cr);
7402  format %{ "CMPXCHGW [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7403  ins_encode( enc_cmpxchgw(mem_ptr) );
7404  ins_pipe( pipe_cmpxchg );
7405%}
7406
7407instruct compareAndExchangeI( pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7408  match(Set oldval (CompareAndExchangeI mem_ptr (Binary oldval newval)));
7409  effect(KILL cr);
7410  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t" %}
7411  ins_encode( enc_cmpxchg(mem_ptr) );
7412  ins_pipe( pipe_cmpxchg );
7413%}
7414
7415instruct xaddB_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7416  predicate(n->as_LoadStore()->result_not_used());
7417  match(Set dummy (GetAndAddB mem add));
7418  effect(KILL cr);
7419  format %{ "ADDB  [$mem],$add" %}
7420  ins_encode %{
7421    if (os::is_MP()) { __ lock(); }
7422    __ addb($mem$$Address, $add$$constant);
7423  %}
7424  ins_pipe( pipe_cmpxchg );
7425%}
7426
7427// Important to match to xRegI: only 8-bit regs.
7428instruct xaddB( memory mem, xRegI newval, eFlagsReg cr) %{
7429  match(Set newval (GetAndAddB mem newval));
7430  effect(KILL cr);
7431  format %{ "XADDB  [$mem],$newval" %}
7432  ins_encode %{
7433    if (os::is_MP()) { __ lock(); }
7434    __ xaddb($mem$$Address, $newval$$Register);
7435  %}
7436  ins_pipe( pipe_cmpxchg );
7437%}
7438
7439instruct xaddS_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7440  predicate(n->as_LoadStore()->result_not_used());
7441  match(Set dummy (GetAndAddS mem add));
7442  effect(KILL cr);
7443  format %{ "ADDS  [$mem],$add" %}
7444  ins_encode %{
7445    if (os::is_MP()) { __ lock(); }
7446    __ addw($mem$$Address, $add$$constant);
7447  %}
7448  ins_pipe( pipe_cmpxchg );
7449%}
7450
7451instruct xaddS( memory mem, rRegI newval, eFlagsReg cr) %{
7452  match(Set newval (GetAndAddS mem newval));
7453  effect(KILL cr);
7454  format %{ "XADDS  [$mem],$newval" %}
7455  ins_encode %{
7456    if (os::is_MP()) { __ lock(); }
7457    __ xaddw($mem$$Address, $newval$$Register);
7458  %}
7459  ins_pipe( pipe_cmpxchg );
7460%}
7461
7462instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7463  predicate(n->as_LoadStore()->result_not_used());
7464  match(Set dummy (GetAndAddI mem add));
7465  effect(KILL cr);
7466  format %{ "ADDL  [$mem],$add" %}
7467  ins_encode %{
7468    if (os::is_MP()) { __ lock(); }
7469    __ addl($mem$$Address, $add$$constant);
7470  %}
7471  ins_pipe( pipe_cmpxchg );
7472%}
7473
7474instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7475  match(Set newval (GetAndAddI mem newval));
7476  effect(KILL cr);
7477  format %{ "XADDL  [$mem],$newval" %}
7478  ins_encode %{
7479    if (os::is_MP()) { __ lock(); }
7480    __ xaddl($mem$$Address, $newval$$Register);
7481  %}
7482  ins_pipe( pipe_cmpxchg );
7483%}
7484
7485// Important to match to xRegI: only 8-bit regs.
7486instruct xchgB( memory mem, xRegI newval) %{
7487  match(Set newval (GetAndSetB mem newval));
7488  format %{ "XCHGB  $newval,[$mem]" %}
7489  ins_encode %{
7490    __ xchgb($newval$$Register, $mem$$Address);
7491  %}
7492  ins_pipe( pipe_cmpxchg );
7493%}
7494
7495instruct xchgS( memory mem, rRegI newval) %{
7496  match(Set newval (GetAndSetS mem newval));
7497  format %{ "XCHGW  $newval,[$mem]" %}
7498  ins_encode %{
7499    __ xchgw($newval$$Register, $mem$$Address);
7500  %}
7501  ins_pipe( pipe_cmpxchg );
7502%}
7503
7504instruct xchgI( memory mem, rRegI newval) %{
7505  match(Set newval (GetAndSetI mem newval));
7506  format %{ "XCHGL  $newval,[$mem]" %}
7507  ins_encode %{
7508    __ xchgl($newval$$Register, $mem$$Address);
7509  %}
7510  ins_pipe( pipe_cmpxchg );
7511%}
7512
7513instruct xchgP( memory mem, pRegP newval) %{
7514  match(Set newval (GetAndSetP mem newval));
7515  format %{ "XCHGL  $newval,[$mem]" %}
7516  ins_encode %{
7517    __ xchgl($newval$$Register, $mem$$Address);
7518  %}
7519  ins_pipe( pipe_cmpxchg );
7520%}
7521
7522//----------Subtraction Instructions-------------------------------------------
7523
7524// Integer Subtraction Instructions
7525instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7526  match(Set dst (SubI dst src));
7527  effect(KILL cr);
7528
7529  size(2);
7530  format %{ "SUB    $dst,$src" %}
7531  opcode(0x2B);
7532  ins_encode( OpcP, RegReg( dst, src) );
7533  ins_pipe( ialu_reg_reg );
7534%}
7535
7536instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7537  match(Set dst (SubI dst src));
7538  effect(KILL cr);
7539
7540  format %{ "SUB    $dst,$src" %}
7541  opcode(0x81,0x05);  /* Opcode 81 /5 */
7542  // ins_encode( RegImm( dst, src) );
7543  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7544  ins_pipe( ialu_reg );
7545%}
7546
7547instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7548  match(Set dst (SubI dst (LoadI src)));
7549  effect(KILL cr);
7550
7551  ins_cost(125);
7552  format %{ "SUB    $dst,$src" %}
7553  opcode(0x2B);
7554  ins_encode( OpcP, RegMem( dst, src) );
7555  ins_pipe( ialu_reg_mem );
7556%}
7557
7558instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7559  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7560  effect(KILL cr);
7561
7562  ins_cost(150);
7563  format %{ "SUB    $dst,$src" %}
7564  opcode(0x29);  /* Opcode 29 /r */
7565  ins_encode( OpcP, RegMem( src, dst ) );
7566  ins_pipe( ialu_mem_reg );
7567%}
7568
7569// Subtract from a pointer
7570instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7571  match(Set dst (AddP dst (SubI zero src)));
7572  effect(KILL cr);
7573
7574  size(2);
7575  format %{ "SUB    $dst,$src" %}
7576  opcode(0x2B);
7577  ins_encode( OpcP, RegReg( dst, src) );
7578  ins_pipe( ialu_reg_reg );
7579%}
7580
7581instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7582  match(Set dst (SubI zero dst));
7583  effect(KILL cr);
7584
7585  size(2);
7586  format %{ "NEG    $dst" %}
7587  opcode(0xF7,0x03);  // Opcode F7 /3
7588  ins_encode( OpcP, RegOpc( dst ) );
7589  ins_pipe( ialu_reg );
7590%}
7591
7592//----------Multiplication/Division Instructions-------------------------------
7593// Integer Multiplication Instructions
7594// Multiply Register
7595instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7596  match(Set dst (MulI dst src));
7597  effect(KILL cr);
7598
7599  size(3);
7600  ins_cost(300);
7601  format %{ "IMUL   $dst,$src" %}
7602  opcode(0xAF, 0x0F);
7603  ins_encode( OpcS, OpcP, RegReg( dst, src) );
7604  ins_pipe( ialu_reg_reg_alu0 );
7605%}
7606
7607// Multiply 32-bit Immediate
7608instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7609  match(Set dst (MulI src imm));
7610  effect(KILL cr);
7611
7612  ins_cost(300);
7613  format %{ "IMUL   $dst,$src,$imm" %}
7614  opcode(0x69);  /* 69 /r id */
7615  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7616  ins_pipe( ialu_reg_reg_alu0 );
7617%}
7618
7619instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7620  match(Set dst src);
7621  effect(KILL cr);
7622
7623  // Note that this is artificially increased to make it more expensive than loadConL
7624  ins_cost(250);
7625  format %{ "MOV    EAX,$src\t// low word only" %}
7626  opcode(0xB8);
7627  ins_encode( LdImmL_Lo(dst, src) );
7628  ins_pipe( ialu_reg_fat );
7629%}
7630
7631// Multiply by 32-bit Immediate, taking the shifted high order results
7632//  (special case for shift by 32)
7633instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7634  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7635  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7636             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7637             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7638  effect(USE src1, KILL cr);
7639
7640  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7641  ins_cost(0*100 + 1*400 - 150);
7642  format %{ "IMUL   EDX:EAX,$src1" %}
7643  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7644  ins_pipe( pipe_slow );
7645%}
7646
7647// Multiply by 32-bit Immediate, taking the shifted high order results
7648instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7649  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7650  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7651             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7652             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7653  effect(USE src1, KILL cr);
7654
7655  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7656  ins_cost(1*100 + 1*400 - 150);
7657  format %{ "IMUL   EDX:EAX,$src1\n\t"
7658            "SAR    EDX,$cnt-32" %}
7659  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7660  ins_pipe( pipe_slow );
7661%}
7662
7663// Multiply Memory 32-bit Immediate
7664instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7665  match(Set dst (MulI (LoadI src) imm));
7666  effect(KILL cr);
7667
7668  ins_cost(300);
7669  format %{ "IMUL   $dst,$src,$imm" %}
7670  opcode(0x69);  /* 69 /r id */
7671  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7672  ins_pipe( ialu_reg_mem_alu0 );
7673%}
7674
7675// Multiply Memory
7676instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7677  match(Set dst (MulI dst (LoadI src)));
7678  effect(KILL cr);
7679
7680  ins_cost(350);
7681  format %{ "IMUL   $dst,$src" %}
7682  opcode(0xAF, 0x0F);
7683  ins_encode( OpcS, OpcP, RegMem( dst, src) );
7684  ins_pipe( ialu_reg_mem_alu0 );
7685%}
7686
7687// Multiply Register Int to Long
7688instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7689  // Basic Idea: long = (long)int * (long)int
7690  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7691  effect(DEF dst, USE src, USE src1, KILL flags);
7692
7693  ins_cost(300);
7694  format %{ "IMUL   $dst,$src1" %}
7695
7696  ins_encode( long_int_multiply( dst, src1 ) );
7697  ins_pipe( ialu_reg_reg_alu0 );
7698%}
7699
7700instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7701  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7702  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7703  effect(KILL flags);
7704
7705  ins_cost(300);
7706  format %{ "MUL    $dst,$src1" %}
7707
7708  ins_encode( long_uint_multiply(dst, src1) );
7709  ins_pipe( ialu_reg_reg_alu0 );
7710%}
7711
7712// Multiply Register Long
7713instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7714  match(Set dst (MulL dst src));
7715  effect(KILL cr, TEMP tmp);
7716  ins_cost(4*100+3*400);
7717// Basic idea: lo(result) = lo(x_lo * y_lo)
7718//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7719  format %{ "MOV    $tmp,$src.lo\n\t"
7720            "IMUL   $tmp,EDX\n\t"
7721            "MOV    EDX,$src.hi\n\t"
7722            "IMUL   EDX,EAX\n\t"
7723            "ADD    $tmp,EDX\n\t"
7724            "MUL    EDX:EAX,$src.lo\n\t"
7725            "ADD    EDX,$tmp" %}
7726  ins_encode( long_multiply( dst, src, tmp ) );
7727  ins_pipe( pipe_slow );
7728%}
7729
7730// Multiply Register Long where the left operand's high 32 bits are zero
7731instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7732  predicate(is_operand_hi32_zero(n->in(1)));
7733  match(Set dst (MulL dst src));
7734  effect(KILL cr, TEMP tmp);
7735  ins_cost(2*100+2*400);
7736// Basic idea: lo(result) = lo(x_lo * y_lo)
7737//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7738  format %{ "MOV    $tmp,$src.hi\n\t"
7739            "IMUL   $tmp,EAX\n\t"
7740            "MUL    EDX:EAX,$src.lo\n\t"
7741            "ADD    EDX,$tmp" %}
7742  ins_encode %{
7743    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7744    __ imull($tmp$$Register, rax);
7745    __ mull($src$$Register);
7746    __ addl(rdx, $tmp$$Register);
7747  %}
7748  ins_pipe( pipe_slow );
7749%}
7750
7751// Multiply Register Long where the right operand's high 32 bits are zero
7752instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7753  predicate(is_operand_hi32_zero(n->in(2)));
7754  match(Set dst (MulL dst src));
7755  effect(KILL cr, TEMP tmp);
7756  ins_cost(2*100+2*400);
7757// Basic idea: lo(result) = lo(x_lo * y_lo)
7758//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7759  format %{ "MOV    $tmp,$src.lo\n\t"
7760            "IMUL   $tmp,EDX\n\t"
7761            "MUL    EDX:EAX,$src.lo\n\t"
7762            "ADD    EDX,$tmp" %}
7763  ins_encode %{
7764    __ movl($tmp$$Register, $src$$Register);
7765    __ imull($tmp$$Register, rdx);
7766    __ mull($src$$Register);
7767    __ addl(rdx, $tmp$$Register);
7768  %}
7769  ins_pipe( pipe_slow );
7770%}
7771
7772// Multiply Register Long where the left and the right operands' high 32 bits are zero
7773instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7774  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7775  match(Set dst (MulL dst src));
7776  effect(KILL cr);
7777  ins_cost(1*400);
7778// Basic idea: lo(result) = lo(x_lo * y_lo)
7779//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7780  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7781  ins_encode %{
7782    __ mull($src$$Register);
7783  %}
7784  ins_pipe( pipe_slow );
7785%}
7786
7787// Multiply Register Long by small constant
7788instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7789  match(Set dst (MulL dst src));
7790  effect(KILL cr, TEMP tmp);
7791  ins_cost(2*100+2*400);
7792  size(12);
7793// Basic idea: lo(result) = lo(src * EAX)
7794//             hi(result) = hi(src * EAX) + lo(src * EDX)
7795  format %{ "IMUL   $tmp,EDX,$src\n\t"
7796            "MOV    EDX,$src\n\t"
7797            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7798            "ADD    EDX,$tmp" %}
7799  ins_encode( long_multiply_con( dst, src, tmp ) );
7800  ins_pipe( pipe_slow );
7801%}
7802
7803// Integer DIV with Register
7804instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7805  match(Set rax (DivI rax div));
7806  effect(KILL rdx, KILL cr);
7807  size(26);
7808  ins_cost(30*100+10*100);
7809  format %{ "CMP    EAX,0x80000000\n\t"
7810            "JNE,s  normal\n\t"
7811            "XOR    EDX,EDX\n\t"
7812            "CMP    ECX,-1\n\t"
7813            "JE,s   done\n"
7814    "normal: CDQ\n\t"
7815            "IDIV   $div\n\t"
7816    "done:"        %}
7817  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7818  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7819  ins_pipe( ialu_reg_reg_alu0 );
7820%}
7821
7822// Divide Register Long
7823instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7824  match(Set dst (DivL src1 src2));
7825  effect( KILL cr, KILL cx, KILL bx );
7826  ins_cost(10000);
7827  format %{ "PUSH   $src1.hi\n\t"
7828            "PUSH   $src1.lo\n\t"
7829            "PUSH   $src2.hi\n\t"
7830            "PUSH   $src2.lo\n\t"
7831            "CALL   SharedRuntime::ldiv\n\t"
7832            "ADD    ESP,16" %}
7833  ins_encode( long_div(src1,src2) );
7834  ins_pipe( pipe_slow );
7835%}
7836
7837// Integer DIVMOD with Register, both quotient and mod results
7838instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7839  match(DivModI rax div);
7840  effect(KILL cr);
7841  size(26);
7842  ins_cost(30*100+10*100);
7843  format %{ "CMP    EAX,0x80000000\n\t"
7844            "JNE,s  normal\n\t"
7845            "XOR    EDX,EDX\n\t"
7846            "CMP    ECX,-1\n\t"
7847            "JE,s   done\n"
7848    "normal: CDQ\n\t"
7849            "IDIV   $div\n\t"
7850    "done:"        %}
7851  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7852  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7853  ins_pipe( pipe_slow );
7854%}
7855
7856// Integer MOD with Register
7857instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7858  match(Set rdx (ModI rax div));
7859  effect(KILL rax, KILL cr);
7860
7861  size(26);
7862  ins_cost(300);
7863  format %{ "CDQ\n\t"
7864            "IDIV   $div" %}
7865  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7866  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7867  ins_pipe( ialu_reg_reg_alu0 );
7868%}
7869
7870// Remainder Register Long
7871instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7872  match(Set dst (ModL src1 src2));
7873  effect( KILL cr, KILL cx, KILL bx );
7874  ins_cost(10000);
7875  format %{ "PUSH   $src1.hi\n\t"
7876            "PUSH   $src1.lo\n\t"
7877            "PUSH   $src2.hi\n\t"
7878            "PUSH   $src2.lo\n\t"
7879            "CALL   SharedRuntime::lrem\n\t"
7880            "ADD    ESP,16" %}
7881  ins_encode( long_mod(src1,src2) );
7882  ins_pipe( pipe_slow );
7883%}
7884
7885// Divide Register Long (no special case since divisor != -1)
7886instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7887  match(Set dst (DivL dst imm));
7888  effect( TEMP tmp, TEMP tmp2, KILL cr );
7889  ins_cost(1000);
7890  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7891            "XOR    $tmp2,$tmp2\n\t"
7892            "CMP    $tmp,EDX\n\t"
7893            "JA,s   fast\n\t"
7894            "MOV    $tmp2,EAX\n\t"
7895            "MOV    EAX,EDX\n\t"
7896            "MOV    EDX,0\n\t"
7897            "JLE,s  pos\n\t"
7898            "LNEG   EAX : $tmp2\n\t"
7899            "DIV    $tmp # unsigned division\n\t"
7900            "XCHG   EAX,$tmp2\n\t"
7901            "DIV    $tmp\n\t"
7902            "LNEG   $tmp2 : EAX\n\t"
7903            "JMP,s  done\n"
7904    "pos:\n\t"
7905            "DIV    $tmp\n\t"
7906            "XCHG   EAX,$tmp2\n"
7907    "fast:\n\t"
7908            "DIV    $tmp\n"
7909    "done:\n\t"
7910            "MOV    EDX,$tmp2\n\t"
7911            "NEG    EDX:EAX # if $imm < 0" %}
7912  ins_encode %{
7913    int con = (int)$imm$$constant;
7914    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7915    int pcon = (con > 0) ? con : -con;
7916    Label Lfast, Lpos, Ldone;
7917
7918    __ movl($tmp$$Register, pcon);
7919    __ xorl($tmp2$$Register,$tmp2$$Register);
7920    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7921    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7922
7923    __ movl($tmp2$$Register, $dst$$Register); // save
7924    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7925    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7926    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7927
7928    // Negative dividend.
7929    // convert value to positive to use unsigned division
7930    __ lneg($dst$$Register, $tmp2$$Register);
7931    __ divl($tmp$$Register);
7932    __ xchgl($dst$$Register, $tmp2$$Register);
7933    __ divl($tmp$$Register);
7934    // revert result back to negative
7935    __ lneg($tmp2$$Register, $dst$$Register);
7936    __ jmpb(Ldone);
7937
7938    __ bind(Lpos);
7939    __ divl($tmp$$Register); // Use unsigned division
7940    __ xchgl($dst$$Register, $tmp2$$Register);
7941    // Fallthrow for final divide, tmp2 has 32 bit hi result
7942
7943    __ bind(Lfast);
7944    // fast path: src is positive
7945    __ divl($tmp$$Register); // Use unsigned division
7946
7947    __ bind(Ldone);
7948    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7949    if (con < 0) {
7950      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7951    }
7952  %}
7953  ins_pipe( pipe_slow );
7954%}
7955
7956// Remainder Register Long (remainder fit into 32 bits)
7957instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7958  match(Set dst (ModL dst imm));
7959  effect( TEMP tmp, TEMP tmp2, KILL cr );
7960  ins_cost(1000);
7961  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7962            "CMP    $tmp,EDX\n\t"
7963            "JA,s   fast\n\t"
7964            "MOV    $tmp2,EAX\n\t"
7965            "MOV    EAX,EDX\n\t"
7966            "MOV    EDX,0\n\t"
7967            "JLE,s  pos\n\t"
7968            "LNEG   EAX : $tmp2\n\t"
7969            "DIV    $tmp # unsigned division\n\t"
7970            "MOV    EAX,$tmp2\n\t"
7971            "DIV    $tmp\n\t"
7972            "NEG    EDX\n\t"
7973            "JMP,s  done\n"
7974    "pos:\n\t"
7975            "DIV    $tmp\n\t"
7976            "MOV    EAX,$tmp2\n"
7977    "fast:\n\t"
7978            "DIV    $tmp\n"
7979    "done:\n\t"
7980            "MOV    EAX,EDX\n\t"
7981            "SAR    EDX,31\n\t" %}
7982  ins_encode %{
7983    int con = (int)$imm$$constant;
7984    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7985    int pcon = (con > 0) ? con : -con;
7986    Label  Lfast, Lpos, Ldone;
7987
7988    __ movl($tmp$$Register, pcon);
7989    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7990    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7991
7992    __ movl($tmp2$$Register, $dst$$Register); // save
7993    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7994    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7995    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7996
7997    // Negative dividend.
7998    // convert value to positive to use unsigned division
7999    __ lneg($dst$$Register, $tmp2$$Register);
8000    __ divl($tmp$$Register);
8001    __ movl($dst$$Register, $tmp2$$Register);
8002    __ divl($tmp$$Register);
8003    // revert remainder back to negative
8004    __ negl(HIGH_FROM_LOW($dst$$Register));
8005    __ jmpb(Ldone);
8006
8007    __ bind(Lpos);
8008    __ divl($tmp$$Register);
8009    __ movl($dst$$Register, $tmp2$$Register);
8010
8011    __ bind(Lfast);
8012    // fast path: src is positive
8013    __ divl($tmp$$Register);
8014
8015    __ bind(Ldone);
8016    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8017    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8018
8019  %}
8020  ins_pipe( pipe_slow );
8021%}
8022
8023// Integer Shift Instructions
8024// Shift Left by one
8025instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8026  match(Set dst (LShiftI dst shift));
8027  effect(KILL cr);
8028
8029  size(2);
8030  format %{ "SHL    $dst,$shift" %}
8031  opcode(0xD1, 0x4);  /* D1 /4 */
8032  ins_encode( OpcP, RegOpc( dst ) );
8033  ins_pipe( ialu_reg );
8034%}
8035
8036// Shift Left by 8-bit immediate
8037instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8038  match(Set dst (LShiftI dst shift));
8039  effect(KILL cr);
8040
8041  size(3);
8042  format %{ "SHL    $dst,$shift" %}
8043  opcode(0xC1, 0x4);  /* C1 /4 ib */
8044  ins_encode( RegOpcImm( dst, shift) );
8045  ins_pipe( ialu_reg );
8046%}
8047
8048// Shift Left by variable
8049instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8050  match(Set dst (LShiftI dst shift));
8051  effect(KILL cr);
8052
8053  size(2);
8054  format %{ "SHL    $dst,$shift" %}
8055  opcode(0xD3, 0x4);  /* D3 /4 */
8056  ins_encode( OpcP, RegOpc( dst ) );
8057  ins_pipe( ialu_reg_reg );
8058%}
8059
8060// Arithmetic shift right by one
8061instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8062  match(Set dst (RShiftI dst shift));
8063  effect(KILL cr);
8064
8065  size(2);
8066  format %{ "SAR    $dst,$shift" %}
8067  opcode(0xD1, 0x7);  /* D1 /7 */
8068  ins_encode( OpcP, RegOpc( dst ) );
8069  ins_pipe( ialu_reg );
8070%}
8071
8072// Arithmetic shift right by one
8073instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8074  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8075  effect(KILL cr);
8076  format %{ "SAR    $dst,$shift" %}
8077  opcode(0xD1, 0x7);  /* D1 /7 */
8078  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8079  ins_pipe( ialu_mem_imm );
8080%}
8081
8082// Arithmetic Shift Right by 8-bit immediate
8083instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8084  match(Set dst (RShiftI dst shift));
8085  effect(KILL cr);
8086
8087  size(3);
8088  format %{ "SAR    $dst,$shift" %}
8089  opcode(0xC1, 0x7);  /* C1 /7 ib */
8090  ins_encode( RegOpcImm( dst, shift ) );
8091  ins_pipe( ialu_mem_imm );
8092%}
8093
8094// Arithmetic Shift Right by 8-bit immediate
8095instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8096  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8097  effect(KILL cr);
8098
8099  format %{ "SAR    $dst,$shift" %}
8100  opcode(0xC1, 0x7);  /* C1 /7 ib */
8101  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8102  ins_pipe( ialu_mem_imm );
8103%}
8104
8105// Arithmetic Shift Right by variable
8106instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8107  match(Set dst (RShiftI dst shift));
8108  effect(KILL cr);
8109
8110  size(2);
8111  format %{ "SAR    $dst,$shift" %}
8112  opcode(0xD3, 0x7);  /* D3 /7 */
8113  ins_encode( OpcP, RegOpc( dst ) );
8114  ins_pipe( ialu_reg_reg );
8115%}
8116
8117// Logical shift right by one
8118instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8119  match(Set dst (URShiftI dst shift));
8120  effect(KILL cr);
8121
8122  size(2);
8123  format %{ "SHR    $dst,$shift" %}
8124  opcode(0xD1, 0x5);  /* D1 /5 */
8125  ins_encode( OpcP, RegOpc( dst ) );
8126  ins_pipe( ialu_reg );
8127%}
8128
8129// Logical Shift Right by 8-bit immediate
8130instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
8131  match(Set dst (URShiftI dst shift));
8132  effect(KILL cr);
8133
8134  size(3);
8135  format %{ "SHR    $dst,$shift" %}
8136  opcode(0xC1, 0x5);  /* C1 /5 ib */
8137  ins_encode( RegOpcImm( dst, shift) );
8138  ins_pipe( ialu_reg );
8139%}
8140
8141
8142// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8143// This idiom is used by the compiler for the i2b bytecode.
8144instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
8145  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8146
8147  size(3);
8148  format %{ "MOVSX  $dst,$src :8" %}
8149  ins_encode %{
8150    __ movsbl($dst$$Register, $src$$Register);
8151  %}
8152  ins_pipe(ialu_reg_reg);
8153%}
8154
8155// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8156// This idiom is used by the compiler the i2s bytecode.
8157instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
8158  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8159
8160  size(3);
8161  format %{ "MOVSX  $dst,$src :16" %}
8162  ins_encode %{
8163    __ movswl($dst$$Register, $src$$Register);
8164  %}
8165  ins_pipe(ialu_reg_reg);
8166%}
8167
8168
8169// Logical Shift Right by variable
8170instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
8171  match(Set dst (URShiftI dst shift));
8172  effect(KILL cr);
8173
8174  size(2);
8175  format %{ "SHR    $dst,$shift" %}
8176  opcode(0xD3, 0x5);  /* D3 /5 */
8177  ins_encode( OpcP, RegOpc( dst ) );
8178  ins_pipe( ialu_reg_reg );
8179%}
8180
8181
8182//----------Logical Instructions-----------------------------------------------
8183//----------Integer Logical Instructions---------------------------------------
8184// And Instructions
8185// And Register with Register
8186instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8187  match(Set dst (AndI dst src));
8188  effect(KILL cr);
8189
8190  size(2);
8191  format %{ "AND    $dst,$src" %}
8192  opcode(0x23);
8193  ins_encode( OpcP, RegReg( dst, src) );
8194  ins_pipe( ialu_reg_reg );
8195%}
8196
8197// And Register with Immediate
8198instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8199  match(Set dst (AndI dst src));
8200  effect(KILL cr);
8201
8202  format %{ "AND    $dst,$src" %}
8203  opcode(0x81,0x04);  /* Opcode 81 /4 */
8204  // ins_encode( RegImm( dst, src) );
8205  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8206  ins_pipe( ialu_reg );
8207%}
8208
8209// And Register with Memory
8210instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8211  match(Set dst (AndI dst (LoadI src)));
8212  effect(KILL cr);
8213
8214  ins_cost(125);
8215  format %{ "AND    $dst,$src" %}
8216  opcode(0x23);
8217  ins_encode( OpcP, RegMem( dst, src) );
8218  ins_pipe( ialu_reg_mem );
8219%}
8220
8221// And Memory with Register
8222instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8223  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8224  effect(KILL cr);
8225
8226  ins_cost(150);
8227  format %{ "AND    $dst,$src" %}
8228  opcode(0x21);  /* Opcode 21 /r */
8229  ins_encode( OpcP, RegMem( src, dst ) );
8230  ins_pipe( ialu_mem_reg );
8231%}
8232
8233// And Memory with Immediate
8234instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8235  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8236  effect(KILL cr);
8237
8238  ins_cost(125);
8239  format %{ "AND    $dst,$src" %}
8240  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8241  // ins_encode( MemImm( dst, src) );
8242  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8243  ins_pipe( ialu_mem_imm );
8244%}
8245
8246// BMI1 instructions
8247instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8248  match(Set dst (AndI (XorI src1 minus_1) src2));
8249  predicate(UseBMI1Instructions);
8250  effect(KILL cr);
8251
8252  format %{ "ANDNL  $dst, $src1, $src2" %}
8253
8254  ins_encode %{
8255    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8256  %}
8257  ins_pipe(ialu_reg);
8258%}
8259
8260instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8261  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8262  predicate(UseBMI1Instructions);
8263  effect(KILL cr);
8264
8265  ins_cost(125);
8266  format %{ "ANDNL  $dst, $src1, $src2" %}
8267
8268  ins_encode %{
8269    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8270  %}
8271  ins_pipe(ialu_reg_mem);
8272%}
8273
8274instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8275  match(Set dst (AndI (SubI imm_zero src) src));
8276  predicate(UseBMI1Instructions);
8277  effect(KILL cr);
8278
8279  format %{ "BLSIL  $dst, $src" %}
8280
8281  ins_encode %{
8282    __ blsil($dst$$Register, $src$$Register);
8283  %}
8284  ins_pipe(ialu_reg);
8285%}
8286
8287instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8288  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8289  predicate(UseBMI1Instructions);
8290  effect(KILL cr);
8291
8292  ins_cost(125);
8293  format %{ "BLSIL  $dst, $src" %}
8294
8295  ins_encode %{
8296    __ blsil($dst$$Register, $src$$Address);
8297  %}
8298  ins_pipe(ialu_reg_mem);
8299%}
8300
8301instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8302%{
8303  match(Set dst (XorI (AddI src minus_1) src));
8304  predicate(UseBMI1Instructions);
8305  effect(KILL cr);
8306
8307  format %{ "BLSMSKL $dst, $src" %}
8308
8309  ins_encode %{
8310    __ blsmskl($dst$$Register, $src$$Register);
8311  %}
8312
8313  ins_pipe(ialu_reg);
8314%}
8315
8316instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8317%{
8318  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8319  predicate(UseBMI1Instructions);
8320  effect(KILL cr);
8321
8322  ins_cost(125);
8323  format %{ "BLSMSKL $dst, $src" %}
8324
8325  ins_encode %{
8326    __ blsmskl($dst$$Register, $src$$Address);
8327  %}
8328
8329  ins_pipe(ialu_reg_mem);
8330%}
8331
8332instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8333%{
8334  match(Set dst (AndI (AddI src minus_1) src) );
8335  predicate(UseBMI1Instructions);
8336  effect(KILL cr);
8337
8338  format %{ "BLSRL  $dst, $src" %}
8339
8340  ins_encode %{
8341    __ blsrl($dst$$Register, $src$$Register);
8342  %}
8343
8344  ins_pipe(ialu_reg);
8345%}
8346
8347instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8348%{
8349  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8350  predicate(UseBMI1Instructions);
8351  effect(KILL cr);
8352
8353  ins_cost(125);
8354  format %{ "BLSRL  $dst, $src" %}
8355
8356  ins_encode %{
8357    __ blsrl($dst$$Register, $src$$Address);
8358  %}
8359
8360  ins_pipe(ialu_reg_mem);
8361%}
8362
8363// Or Instructions
8364// Or Register with Register
8365instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8366  match(Set dst (OrI dst src));
8367  effect(KILL cr);
8368
8369  size(2);
8370  format %{ "OR     $dst,$src" %}
8371  opcode(0x0B);
8372  ins_encode( OpcP, RegReg( dst, src) );
8373  ins_pipe( ialu_reg_reg );
8374%}
8375
8376instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8377  match(Set dst (OrI dst (CastP2X src)));
8378  effect(KILL cr);
8379
8380  size(2);
8381  format %{ "OR     $dst,$src" %}
8382  opcode(0x0B);
8383  ins_encode( OpcP, RegReg( dst, src) );
8384  ins_pipe( ialu_reg_reg );
8385%}
8386
8387
8388// Or Register with Immediate
8389instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8390  match(Set dst (OrI dst src));
8391  effect(KILL cr);
8392
8393  format %{ "OR     $dst,$src" %}
8394  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8395  // ins_encode( RegImm( dst, src) );
8396  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8397  ins_pipe( ialu_reg );
8398%}
8399
8400// Or Register with Memory
8401instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8402  match(Set dst (OrI dst (LoadI src)));
8403  effect(KILL cr);
8404
8405  ins_cost(125);
8406  format %{ "OR     $dst,$src" %}
8407  opcode(0x0B);
8408  ins_encode( OpcP, RegMem( dst, src) );
8409  ins_pipe( ialu_reg_mem );
8410%}
8411
8412// Or Memory with Register
8413instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8414  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8415  effect(KILL cr);
8416
8417  ins_cost(150);
8418  format %{ "OR     $dst,$src" %}
8419  opcode(0x09);  /* Opcode 09 /r */
8420  ins_encode( OpcP, RegMem( src, dst ) );
8421  ins_pipe( ialu_mem_reg );
8422%}
8423
8424// Or Memory with Immediate
8425instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8426  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8427  effect(KILL cr);
8428
8429  ins_cost(125);
8430  format %{ "OR     $dst,$src" %}
8431  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8432  // ins_encode( MemImm( dst, src) );
8433  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8434  ins_pipe( ialu_mem_imm );
8435%}
8436
8437// ROL/ROR
8438// ROL expand
8439instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8440  effect(USE_DEF dst, USE shift, KILL cr);
8441
8442  format %{ "ROL    $dst, $shift" %}
8443  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8444  ins_encode( OpcP, RegOpc( dst ));
8445  ins_pipe( ialu_reg );
8446%}
8447
8448instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8449  effect(USE_DEF dst, USE shift, KILL cr);
8450
8451  format %{ "ROL    $dst, $shift" %}
8452  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8453  ins_encode( RegOpcImm(dst, shift) );
8454  ins_pipe(ialu_reg);
8455%}
8456
8457instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8458  effect(USE_DEF dst, USE shift, KILL cr);
8459
8460  format %{ "ROL    $dst, $shift" %}
8461  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8462  ins_encode(OpcP, RegOpc(dst));
8463  ins_pipe( ialu_reg_reg );
8464%}
8465// end of ROL expand
8466
8467// ROL 32bit by one once
8468instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8469  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8470
8471  expand %{
8472    rolI_eReg_imm1(dst, lshift, cr);
8473  %}
8474%}
8475
8476// ROL 32bit var by imm8 once
8477instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8478  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8479  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8480
8481  expand %{
8482    rolI_eReg_imm8(dst, lshift, cr);
8483  %}
8484%}
8485
8486// ROL 32bit var by var once
8487instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8488  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8489
8490  expand %{
8491    rolI_eReg_CL(dst, shift, cr);
8492  %}
8493%}
8494
8495// ROL 32bit var by var once
8496instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8497  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8498
8499  expand %{
8500    rolI_eReg_CL(dst, shift, cr);
8501  %}
8502%}
8503
8504// ROR expand
8505instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8506  effect(USE_DEF dst, USE shift, KILL cr);
8507
8508  format %{ "ROR    $dst, $shift" %}
8509  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8510  ins_encode( OpcP, RegOpc( dst ) );
8511  ins_pipe( ialu_reg );
8512%}
8513
8514instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8515  effect (USE_DEF dst, USE shift, KILL cr);
8516
8517  format %{ "ROR    $dst, $shift" %}
8518  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8519  ins_encode( RegOpcImm(dst, shift) );
8520  ins_pipe( ialu_reg );
8521%}
8522
8523instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8524  effect(USE_DEF dst, USE shift, KILL cr);
8525
8526  format %{ "ROR    $dst, $shift" %}
8527  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8528  ins_encode(OpcP, RegOpc(dst));
8529  ins_pipe( ialu_reg_reg );
8530%}
8531// end of ROR expand
8532
8533// ROR right once
8534instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8535  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8536
8537  expand %{
8538    rorI_eReg_imm1(dst, rshift, cr);
8539  %}
8540%}
8541
8542// ROR 32bit by immI8 once
8543instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8544  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8545  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8546
8547  expand %{
8548    rorI_eReg_imm8(dst, rshift, cr);
8549  %}
8550%}
8551
8552// ROR 32bit var by var once
8553instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8554  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8555
8556  expand %{
8557    rorI_eReg_CL(dst, shift, cr);
8558  %}
8559%}
8560
8561// ROR 32bit var by var once
8562instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8563  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8564
8565  expand %{
8566    rorI_eReg_CL(dst, shift, cr);
8567  %}
8568%}
8569
8570// Xor Instructions
8571// Xor Register with Register
8572instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8573  match(Set dst (XorI dst src));
8574  effect(KILL cr);
8575
8576  size(2);
8577  format %{ "XOR    $dst,$src" %}
8578  opcode(0x33);
8579  ins_encode( OpcP, RegReg( dst, src) );
8580  ins_pipe( ialu_reg_reg );
8581%}
8582
8583// Xor Register with Immediate -1
8584instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8585  match(Set dst (XorI dst imm));
8586
8587  size(2);
8588  format %{ "NOT    $dst" %}
8589  ins_encode %{
8590     __ notl($dst$$Register);
8591  %}
8592  ins_pipe( ialu_reg );
8593%}
8594
8595// Xor Register with Immediate
8596instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8597  match(Set dst (XorI dst src));
8598  effect(KILL cr);
8599
8600  format %{ "XOR    $dst,$src" %}
8601  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8602  // ins_encode( RegImm( dst, src) );
8603  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8604  ins_pipe( ialu_reg );
8605%}
8606
8607// Xor Register with Memory
8608instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8609  match(Set dst (XorI dst (LoadI src)));
8610  effect(KILL cr);
8611
8612  ins_cost(125);
8613  format %{ "XOR    $dst,$src" %}
8614  opcode(0x33);
8615  ins_encode( OpcP, RegMem(dst, src) );
8616  ins_pipe( ialu_reg_mem );
8617%}
8618
8619// Xor Memory with Register
8620instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8621  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8622  effect(KILL cr);
8623
8624  ins_cost(150);
8625  format %{ "XOR    $dst,$src" %}
8626  opcode(0x31);  /* Opcode 31 /r */
8627  ins_encode( OpcP, RegMem( src, dst ) );
8628  ins_pipe( ialu_mem_reg );
8629%}
8630
8631// Xor Memory with Immediate
8632instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8633  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8634  effect(KILL cr);
8635
8636  ins_cost(125);
8637  format %{ "XOR    $dst,$src" %}
8638  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8639  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8640  ins_pipe( ialu_mem_imm );
8641%}
8642
8643//----------Convert Int to Boolean---------------------------------------------
8644
8645instruct movI_nocopy(rRegI dst, rRegI src) %{
8646  effect( DEF dst, USE src );
8647  format %{ "MOV    $dst,$src" %}
8648  ins_encode( enc_Copy( dst, src) );
8649  ins_pipe( ialu_reg_reg );
8650%}
8651
8652instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8653  effect( USE_DEF dst, USE src, KILL cr );
8654
8655  size(4);
8656  format %{ "NEG    $dst\n\t"
8657            "ADC    $dst,$src" %}
8658  ins_encode( neg_reg(dst),
8659              OpcRegReg(0x13,dst,src) );
8660  ins_pipe( ialu_reg_reg_long );
8661%}
8662
8663instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8664  match(Set dst (Conv2B src));
8665
8666  expand %{
8667    movI_nocopy(dst,src);
8668    ci2b(dst,src,cr);
8669  %}
8670%}
8671
8672instruct movP_nocopy(rRegI dst, eRegP src) %{
8673  effect( DEF dst, USE src );
8674  format %{ "MOV    $dst,$src" %}
8675  ins_encode( enc_Copy( dst, src) );
8676  ins_pipe( ialu_reg_reg );
8677%}
8678
8679instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8680  effect( USE_DEF dst, USE src, KILL cr );
8681  format %{ "NEG    $dst\n\t"
8682            "ADC    $dst,$src" %}
8683  ins_encode( neg_reg(dst),
8684              OpcRegReg(0x13,dst,src) );
8685  ins_pipe( ialu_reg_reg_long );
8686%}
8687
8688instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8689  match(Set dst (Conv2B src));
8690
8691  expand %{
8692    movP_nocopy(dst,src);
8693    cp2b(dst,src,cr);
8694  %}
8695%}
8696
8697instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8698  match(Set dst (CmpLTMask p q));
8699  effect(KILL cr);
8700  ins_cost(400);
8701
8702  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8703  format %{ "XOR    $dst,$dst\n\t"
8704            "CMP    $p,$q\n\t"
8705            "SETlt  $dst\n\t"
8706            "NEG    $dst" %}
8707  ins_encode %{
8708    Register Rp = $p$$Register;
8709    Register Rq = $q$$Register;
8710    Register Rd = $dst$$Register;
8711    Label done;
8712    __ xorl(Rd, Rd);
8713    __ cmpl(Rp, Rq);
8714    __ setb(Assembler::less, Rd);
8715    __ negl(Rd);
8716  %}
8717
8718  ins_pipe(pipe_slow);
8719%}
8720
8721instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8722  match(Set dst (CmpLTMask dst zero));
8723  effect(DEF dst, KILL cr);
8724  ins_cost(100);
8725
8726  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8727  ins_encode %{
8728  __ sarl($dst$$Register, 31);
8729  %}
8730  ins_pipe(ialu_reg);
8731%}
8732
8733/* better to save a register than avoid a branch */
8734instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8735  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8736  effect(KILL cr);
8737  ins_cost(400);
8738  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8739            "JGE    done\n\t"
8740            "ADD    $p,$y\n"
8741            "done:  " %}
8742  ins_encode %{
8743    Register Rp = $p$$Register;
8744    Register Rq = $q$$Register;
8745    Register Ry = $y$$Register;
8746    Label done;
8747    __ subl(Rp, Rq);
8748    __ jccb(Assembler::greaterEqual, done);
8749    __ addl(Rp, Ry);
8750    __ bind(done);
8751  %}
8752
8753  ins_pipe(pipe_cmplt);
8754%}
8755
8756/* better to save a register than avoid a branch */
8757instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8758  match(Set y (AndI (CmpLTMask p q) y));
8759  effect(KILL cr);
8760
8761  ins_cost(300);
8762
8763  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8764            "JLT      done\n\t"
8765            "XORL     $y, $y\n"
8766            "done:  " %}
8767  ins_encode %{
8768    Register Rp = $p$$Register;
8769    Register Rq = $q$$Register;
8770    Register Ry = $y$$Register;
8771    Label done;
8772    __ cmpl(Rp, Rq);
8773    __ jccb(Assembler::less, done);
8774    __ xorl(Ry, Ry);
8775    __ bind(done);
8776  %}
8777
8778  ins_pipe(pipe_cmplt);
8779%}
8780
8781/* If I enable this, I encourage spilling in the inner loop of compress.
8782instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8783  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8784*/
8785//----------Overflow Math Instructions-----------------------------------------
8786
8787instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8788%{
8789  match(Set cr (OverflowAddI op1 op2));
8790  effect(DEF cr, USE_KILL op1, USE op2);
8791
8792  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8793
8794  ins_encode %{
8795    __ addl($op1$$Register, $op2$$Register);
8796  %}
8797  ins_pipe(ialu_reg_reg);
8798%}
8799
8800instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8801%{
8802  match(Set cr (OverflowAddI op1 op2));
8803  effect(DEF cr, USE_KILL op1, USE op2);
8804
8805  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8806
8807  ins_encode %{
8808    __ addl($op1$$Register, $op2$$constant);
8809  %}
8810  ins_pipe(ialu_reg_reg);
8811%}
8812
8813instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8814%{
8815  match(Set cr (OverflowSubI op1 op2));
8816
8817  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8818  ins_encode %{
8819    __ cmpl($op1$$Register, $op2$$Register);
8820  %}
8821  ins_pipe(ialu_reg_reg);
8822%}
8823
8824instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8825%{
8826  match(Set cr (OverflowSubI op1 op2));
8827
8828  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8829  ins_encode %{
8830    __ cmpl($op1$$Register, $op2$$constant);
8831  %}
8832  ins_pipe(ialu_reg_reg);
8833%}
8834
8835instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8836%{
8837  match(Set cr (OverflowSubI zero op2));
8838  effect(DEF cr, USE_KILL op2);
8839
8840  format %{ "NEG    $op2\t# overflow check int" %}
8841  ins_encode %{
8842    __ negl($op2$$Register);
8843  %}
8844  ins_pipe(ialu_reg_reg);
8845%}
8846
8847instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8848%{
8849  match(Set cr (OverflowMulI op1 op2));
8850  effect(DEF cr, USE_KILL op1, USE op2);
8851
8852  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8853  ins_encode %{
8854    __ imull($op1$$Register, $op2$$Register);
8855  %}
8856  ins_pipe(ialu_reg_reg_alu0);
8857%}
8858
8859instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8860%{
8861  match(Set cr (OverflowMulI op1 op2));
8862  effect(DEF cr, TEMP tmp, USE op1, USE op2);
8863
8864  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8865  ins_encode %{
8866    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8867  %}
8868  ins_pipe(ialu_reg_reg_alu0);
8869%}
8870
8871//----------Long Instructions------------------------------------------------
8872// Add Long Register with Register
8873instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8874  match(Set dst (AddL dst src));
8875  effect(KILL cr);
8876  ins_cost(200);
8877  format %{ "ADD    $dst.lo,$src.lo\n\t"
8878            "ADC    $dst.hi,$src.hi" %}
8879  opcode(0x03, 0x13);
8880  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8881  ins_pipe( ialu_reg_reg_long );
8882%}
8883
8884// Add Long Register with Immediate
8885instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8886  match(Set dst (AddL dst src));
8887  effect(KILL cr);
8888  format %{ "ADD    $dst.lo,$src.lo\n\t"
8889            "ADC    $dst.hi,$src.hi" %}
8890  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8891  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8892  ins_pipe( ialu_reg_long );
8893%}
8894
8895// Add Long Register with Memory
8896instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8897  match(Set dst (AddL dst (LoadL mem)));
8898  effect(KILL cr);
8899  ins_cost(125);
8900  format %{ "ADD    $dst.lo,$mem\n\t"
8901            "ADC    $dst.hi,$mem+4" %}
8902  opcode(0x03, 0x13);
8903  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8904  ins_pipe( ialu_reg_long_mem );
8905%}
8906
8907// Subtract Long Register with Register.
8908instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8909  match(Set dst (SubL dst src));
8910  effect(KILL cr);
8911  ins_cost(200);
8912  format %{ "SUB    $dst.lo,$src.lo\n\t"
8913            "SBB    $dst.hi,$src.hi" %}
8914  opcode(0x2B, 0x1B);
8915  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8916  ins_pipe( ialu_reg_reg_long );
8917%}
8918
8919// Subtract Long Register with Immediate
8920instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8921  match(Set dst (SubL dst src));
8922  effect(KILL cr);
8923  format %{ "SUB    $dst.lo,$src.lo\n\t"
8924            "SBB    $dst.hi,$src.hi" %}
8925  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8926  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8927  ins_pipe( ialu_reg_long );
8928%}
8929
8930// Subtract Long Register with Memory
8931instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8932  match(Set dst (SubL dst (LoadL mem)));
8933  effect(KILL cr);
8934  ins_cost(125);
8935  format %{ "SUB    $dst.lo,$mem\n\t"
8936            "SBB    $dst.hi,$mem+4" %}
8937  opcode(0x2B, 0x1B);
8938  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8939  ins_pipe( ialu_reg_long_mem );
8940%}
8941
8942instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8943  match(Set dst (SubL zero dst));
8944  effect(KILL cr);
8945  ins_cost(300);
8946  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8947  ins_encode( neg_long(dst) );
8948  ins_pipe( ialu_reg_reg_long );
8949%}
8950
8951// And Long Register with Register
8952instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8953  match(Set dst (AndL dst src));
8954  effect(KILL cr);
8955  format %{ "AND    $dst.lo,$src.lo\n\t"
8956            "AND    $dst.hi,$src.hi" %}
8957  opcode(0x23,0x23);
8958  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8959  ins_pipe( ialu_reg_reg_long );
8960%}
8961
8962// And Long Register with Immediate
8963instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8964  match(Set dst (AndL dst src));
8965  effect(KILL cr);
8966  format %{ "AND    $dst.lo,$src.lo\n\t"
8967            "AND    $dst.hi,$src.hi" %}
8968  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8969  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8970  ins_pipe( ialu_reg_long );
8971%}
8972
8973// And Long Register with Memory
8974instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8975  match(Set dst (AndL dst (LoadL mem)));
8976  effect(KILL cr);
8977  ins_cost(125);
8978  format %{ "AND    $dst.lo,$mem\n\t"
8979            "AND    $dst.hi,$mem+4" %}
8980  opcode(0x23, 0x23);
8981  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8982  ins_pipe( ialu_reg_long_mem );
8983%}
8984
8985// BMI1 instructions
8986instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8987  match(Set dst (AndL (XorL src1 minus_1) src2));
8988  predicate(UseBMI1Instructions);
8989  effect(KILL cr, TEMP dst);
8990
8991  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8992            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8993         %}
8994
8995  ins_encode %{
8996    Register Rdst = $dst$$Register;
8997    Register Rsrc1 = $src1$$Register;
8998    Register Rsrc2 = $src2$$Register;
8999    __ andnl(Rdst, Rsrc1, Rsrc2);
9000    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
9001  %}
9002  ins_pipe(ialu_reg_reg_long);
9003%}
9004
9005instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
9006  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
9007  predicate(UseBMI1Instructions);
9008  effect(KILL cr, TEMP dst);
9009
9010  ins_cost(125);
9011  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
9012            "ANDNL  $dst.hi, $src1.hi, $src2+4"
9013         %}
9014
9015  ins_encode %{
9016    Register Rdst = $dst$$Register;
9017    Register Rsrc1 = $src1$$Register;
9018    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
9019
9020    __ andnl(Rdst, Rsrc1, $src2$$Address);
9021    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
9022  %}
9023  ins_pipe(ialu_reg_mem);
9024%}
9025
9026instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
9027  match(Set dst (AndL (SubL imm_zero src) src));
9028  predicate(UseBMI1Instructions);
9029  effect(KILL cr, TEMP dst);
9030
9031  format %{ "MOVL   $dst.hi, 0\n\t"
9032            "BLSIL  $dst.lo, $src.lo\n\t"
9033            "JNZ    done\n\t"
9034            "BLSIL  $dst.hi, $src.hi\n"
9035            "done:"
9036         %}
9037
9038  ins_encode %{
9039    Label done;
9040    Register Rdst = $dst$$Register;
9041    Register Rsrc = $src$$Register;
9042    __ movl(HIGH_FROM_LOW(Rdst), 0);
9043    __ blsil(Rdst, Rsrc);
9044    __ jccb(Assembler::notZero, done);
9045    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9046    __ bind(done);
9047  %}
9048  ins_pipe(ialu_reg);
9049%}
9050
9051instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
9052  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
9053  predicate(UseBMI1Instructions);
9054  effect(KILL cr, TEMP dst);
9055
9056  ins_cost(125);
9057  format %{ "MOVL   $dst.hi, 0\n\t"
9058            "BLSIL  $dst.lo, $src\n\t"
9059            "JNZ    done\n\t"
9060            "BLSIL  $dst.hi, $src+4\n"
9061            "done:"
9062         %}
9063
9064  ins_encode %{
9065    Label done;
9066    Register Rdst = $dst$$Register;
9067    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9068
9069    __ movl(HIGH_FROM_LOW(Rdst), 0);
9070    __ blsil(Rdst, $src$$Address);
9071    __ jccb(Assembler::notZero, done);
9072    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
9073    __ bind(done);
9074  %}
9075  ins_pipe(ialu_reg_mem);
9076%}
9077
9078instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9079%{
9080  match(Set dst (XorL (AddL src minus_1) src));
9081  predicate(UseBMI1Instructions);
9082  effect(KILL cr, TEMP dst);
9083
9084  format %{ "MOVL    $dst.hi, 0\n\t"
9085            "BLSMSKL $dst.lo, $src.lo\n\t"
9086            "JNC     done\n\t"
9087            "BLSMSKL $dst.hi, $src.hi\n"
9088            "done:"
9089         %}
9090
9091  ins_encode %{
9092    Label done;
9093    Register Rdst = $dst$$Register;
9094    Register Rsrc = $src$$Register;
9095    __ movl(HIGH_FROM_LOW(Rdst), 0);
9096    __ blsmskl(Rdst, Rsrc);
9097    __ jccb(Assembler::carryClear, done);
9098    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9099    __ bind(done);
9100  %}
9101
9102  ins_pipe(ialu_reg);
9103%}
9104
9105instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9106%{
9107  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
9108  predicate(UseBMI1Instructions);
9109  effect(KILL cr, TEMP dst);
9110
9111  ins_cost(125);
9112  format %{ "MOVL    $dst.hi, 0\n\t"
9113            "BLSMSKL $dst.lo, $src\n\t"
9114            "JNC     done\n\t"
9115            "BLSMSKL $dst.hi, $src+4\n"
9116            "done:"
9117         %}
9118
9119  ins_encode %{
9120    Label done;
9121    Register Rdst = $dst$$Register;
9122    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9123
9124    __ movl(HIGH_FROM_LOW(Rdst), 0);
9125    __ blsmskl(Rdst, $src$$Address);
9126    __ jccb(Assembler::carryClear, done);
9127    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
9128    __ bind(done);
9129  %}
9130
9131  ins_pipe(ialu_reg_mem);
9132%}
9133
9134instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
9135%{
9136  match(Set dst (AndL (AddL src minus_1) src) );
9137  predicate(UseBMI1Instructions);
9138  effect(KILL cr, TEMP dst);
9139
9140  format %{ "MOVL   $dst.hi, $src.hi\n\t"
9141            "BLSRL  $dst.lo, $src.lo\n\t"
9142            "JNC    done\n\t"
9143            "BLSRL  $dst.hi, $src.hi\n"
9144            "done:"
9145  %}
9146
9147  ins_encode %{
9148    Label done;
9149    Register Rdst = $dst$$Register;
9150    Register Rsrc = $src$$Register;
9151    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9152    __ blsrl(Rdst, Rsrc);
9153    __ jccb(Assembler::carryClear, done);
9154    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
9155    __ bind(done);
9156  %}
9157
9158  ins_pipe(ialu_reg);
9159%}
9160
9161instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
9162%{
9163  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
9164  predicate(UseBMI1Instructions);
9165  effect(KILL cr, TEMP dst);
9166
9167  ins_cost(125);
9168  format %{ "MOVL   $dst.hi, $src+4\n\t"
9169            "BLSRL  $dst.lo, $src\n\t"
9170            "JNC    done\n\t"
9171            "BLSRL  $dst.hi, $src+4\n"
9172            "done:"
9173  %}
9174
9175  ins_encode %{
9176    Label done;
9177    Register Rdst = $dst$$Register;
9178    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
9179    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
9180    __ blsrl(Rdst, $src$$Address);
9181    __ jccb(Assembler::carryClear, done);
9182    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
9183    __ bind(done);
9184  %}
9185
9186  ins_pipe(ialu_reg_mem);
9187%}
9188
9189// Or Long Register with Register
9190instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9191  match(Set dst (OrL dst src));
9192  effect(KILL cr);
9193  format %{ "OR     $dst.lo,$src.lo\n\t"
9194            "OR     $dst.hi,$src.hi" %}
9195  opcode(0x0B,0x0B);
9196  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9197  ins_pipe( ialu_reg_reg_long );
9198%}
9199
9200// Or Long Register with Immediate
9201instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9202  match(Set dst (OrL dst src));
9203  effect(KILL cr);
9204  format %{ "OR     $dst.lo,$src.lo\n\t"
9205            "OR     $dst.hi,$src.hi" %}
9206  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9207  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9208  ins_pipe( ialu_reg_long );
9209%}
9210
9211// Or Long Register with Memory
9212instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9213  match(Set dst (OrL dst (LoadL mem)));
9214  effect(KILL cr);
9215  ins_cost(125);
9216  format %{ "OR     $dst.lo,$mem\n\t"
9217            "OR     $dst.hi,$mem+4" %}
9218  opcode(0x0B,0x0B);
9219  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9220  ins_pipe( ialu_reg_long_mem );
9221%}
9222
9223// Xor Long Register with Register
9224instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9225  match(Set dst (XorL dst src));
9226  effect(KILL cr);
9227  format %{ "XOR    $dst.lo,$src.lo\n\t"
9228            "XOR    $dst.hi,$src.hi" %}
9229  opcode(0x33,0x33);
9230  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9231  ins_pipe( ialu_reg_reg_long );
9232%}
9233
9234// Xor Long Register with Immediate -1
9235instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9236  match(Set dst (XorL dst imm));
9237  format %{ "NOT    $dst.lo\n\t"
9238            "NOT    $dst.hi" %}
9239  ins_encode %{
9240     __ notl($dst$$Register);
9241     __ notl(HIGH_FROM_LOW($dst$$Register));
9242  %}
9243  ins_pipe( ialu_reg_long );
9244%}
9245
9246// Xor Long Register with Immediate
9247instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9248  match(Set dst (XorL dst src));
9249  effect(KILL cr);
9250  format %{ "XOR    $dst.lo,$src.lo\n\t"
9251            "XOR    $dst.hi,$src.hi" %}
9252  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9253  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9254  ins_pipe( ialu_reg_long );
9255%}
9256
9257// Xor Long Register with Memory
9258instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9259  match(Set dst (XorL dst (LoadL mem)));
9260  effect(KILL cr);
9261  ins_cost(125);
9262  format %{ "XOR    $dst.lo,$mem\n\t"
9263            "XOR    $dst.hi,$mem+4" %}
9264  opcode(0x33,0x33);
9265  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9266  ins_pipe( ialu_reg_long_mem );
9267%}
9268
9269// Shift Left Long by 1
9270instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9271  predicate(UseNewLongLShift);
9272  match(Set dst (LShiftL dst cnt));
9273  effect(KILL cr);
9274  ins_cost(100);
9275  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9276            "ADC    $dst.hi,$dst.hi" %}
9277  ins_encode %{
9278    __ addl($dst$$Register,$dst$$Register);
9279    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9280  %}
9281  ins_pipe( ialu_reg_long );
9282%}
9283
9284// Shift Left Long by 2
9285instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9286  predicate(UseNewLongLShift);
9287  match(Set dst (LShiftL dst cnt));
9288  effect(KILL cr);
9289  ins_cost(100);
9290  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9291            "ADC    $dst.hi,$dst.hi\n\t"
9292            "ADD    $dst.lo,$dst.lo\n\t"
9293            "ADC    $dst.hi,$dst.hi" %}
9294  ins_encode %{
9295    __ addl($dst$$Register,$dst$$Register);
9296    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9297    __ addl($dst$$Register,$dst$$Register);
9298    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9299  %}
9300  ins_pipe( ialu_reg_long );
9301%}
9302
9303// Shift Left Long by 3
9304instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9305  predicate(UseNewLongLShift);
9306  match(Set dst (LShiftL dst cnt));
9307  effect(KILL cr);
9308  ins_cost(100);
9309  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9310            "ADC    $dst.hi,$dst.hi\n\t"
9311            "ADD    $dst.lo,$dst.lo\n\t"
9312            "ADC    $dst.hi,$dst.hi\n\t"
9313            "ADD    $dst.lo,$dst.lo\n\t"
9314            "ADC    $dst.hi,$dst.hi" %}
9315  ins_encode %{
9316    __ addl($dst$$Register,$dst$$Register);
9317    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9318    __ addl($dst$$Register,$dst$$Register);
9319    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9320    __ addl($dst$$Register,$dst$$Register);
9321    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9322  %}
9323  ins_pipe( ialu_reg_long );
9324%}
9325
9326// Shift Left Long by 1-31
9327instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9328  match(Set dst (LShiftL dst cnt));
9329  effect(KILL cr);
9330  ins_cost(200);
9331  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9332            "SHL    $dst.lo,$cnt" %}
9333  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9334  ins_encode( move_long_small_shift(dst,cnt) );
9335  ins_pipe( ialu_reg_long );
9336%}
9337
9338// Shift Left Long by 32-63
9339instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9340  match(Set dst (LShiftL dst cnt));
9341  effect(KILL cr);
9342  ins_cost(300);
9343  format %{ "MOV    $dst.hi,$dst.lo\n"
9344          "\tSHL    $dst.hi,$cnt-32\n"
9345          "\tXOR    $dst.lo,$dst.lo" %}
9346  opcode(0xC1, 0x4);  /* C1 /4 ib */
9347  ins_encode( move_long_big_shift_clr(dst,cnt) );
9348  ins_pipe( ialu_reg_long );
9349%}
9350
9351// Shift Left Long by variable
9352instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9353  match(Set dst (LShiftL dst shift));
9354  effect(KILL cr);
9355  ins_cost(500+200);
9356  size(17);
9357  format %{ "TEST   $shift,32\n\t"
9358            "JEQ,s  small\n\t"
9359            "MOV    $dst.hi,$dst.lo\n\t"
9360            "XOR    $dst.lo,$dst.lo\n"
9361    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9362            "SHL    $dst.lo,$shift" %}
9363  ins_encode( shift_left_long( dst, shift ) );
9364  ins_pipe( pipe_slow );
9365%}
9366
9367// Shift Right Long by 1-31
9368instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9369  match(Set dst (URShiftL dst cnt));
9370  effect(KILL cr);
9371  ins_cost(200);
9372  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9373            "SHR    $dst.hi,$cnt" %}
9374  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9375  ins_encode( move_long_small_shift(dst,cnt) );
9376  ins_pipe( ialu_reg_long );
9377%}
9378
9379// Shift Right Long by 32-63
9380instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9381  match(Set dst (URShiftL dst cnt));
9382  effect(KILL cr);
9383  ins_cost(300);
9384  format %{ "MOV    $dst.lo,$dst.hi\n"
9385          "\tSHR    $dst.lo,$cnt-32\n"
9386          "\tXOR    $dst.hi,$dst.hi" %}
9387  opcode(0xC1, 0x5);  /* C1 /5 ib */
9388  ins_encode( move_long_big_shift_clr(dst,cnt) );
9389  ins_pipe( ialu_reg_long );
9390%}
9391
9392// Shift Right Long by variable
9393instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9394  match(Set dst (URShiftL dst shift));
9395  effect(KILL cr);
9396  ins_cost(600);
9397  size(17);
9398  format %{ "TEST   $shift,32\n\t"
9399            "JEQ,s  small\n\t"
9400            "MOV    $dst.lo,$dst.hi\n\t"
9401            "XOR    $dst.hi,$dst.hi\n"
9402    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9403            "SHR    $dst.hi,$shift" %}
9404  ins_encode( shift_right_long( dst, shift ) );
9405  ins_pipe( pipe_slow );
9406%}
9407
9408// Shift Right Long by 1-31
9409instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9410  match(Set dst (RShiftL dst cnt));
9411  effect(KILL cr);
9412  ins_cost(200);
9413  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9414            "SAR    $dst.hi,$cnt" %}
9415  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9416  ins_encode( move_long_small_shift(dst,cnt) );
9417  ins_pipe( ialu_reg_long );
9418%}
9419
9420// Shift Right Long by 32-63
9421instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9422  match(Set dst (RShiftL dst cnt));
9423  effect(KILL cr);
9424  ins_cost(300);
9425  format %{ "MOV    $dst.lo,$dst.hi\n"
9426          "\tSAR    $dst.lo,$cnt-32\n"
9427          "\tSAR    $dst.hi,31" %}
9428  opcode(0xC1, 0x7);  /* C1 /7 ib */
9429  ins_encode( move_long_big_shift_sign(dst,cnt) );
9430  ins_pipe( ialu_reg_long );
9431%}
9432
9433// Shift Right arithmetic Long by variable
9434instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9435  match(Set dst (RShiftL dst shift));
9436  effect(KILL cr);
9437  ins_cost(600);
9438  size(18);
9439  format %{ "TEST   $shift,32\n\t"
9440            "JEQ,s  small\n\t"
9441            "MOV    $dst.lo,$dst.hi\n\t"
9442            "SAR    $dst.hi,31\n"
9443    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9444            "SAR    $dst.hi,$shift" %}
9445  ins_encode( shift_right_arith_long( dst, shift ) );
9446  ins_pipe( pipe_slow );
9447%}
9448
9449
9450//----------Double Instructions------------------------------------------------
9451// Double Math
9452
9453// Compare & branch
9454
9455// P6 version of float compare, sets condition codes in EFLAGS
9456instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9457  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9458  match(Set cr (CmpD src1 src2));
9459  effect(KILL rax);
9460  ins_cost(150);
9461  format %{ "FLD    $src1\n\t"
9462            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9463            "JNP    exit\n\t"
9464            "MOV    ah,1       // saw a NaN, set CF\n\t"
9465            "SAHF\n"
9466     "exit:\tNOP               // avoid branch to branch" %}
9467  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9468  ins_encode( Push_Reg_DPR(src1),
9469              OpcP, RegOpc(src2),
9470              cmpF_P6_fixup );
9471  ins_pipe( pipe_slow );
9472%}
9473
9474instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9475  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9476  match(Set cr (CmpD src1 src2));
9477  ins_cost(150);
9478  format %{ "FLD    $src1\n\t"
9479            "FUCOMIP ST,$src2  // P6 instruction" %}
9480  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9481  ins_encode( Push_Reg_DPR(src1),
9482              OpcP, RegOpc(src2));
9483  ins_pipe( pipe_slow );
9484%}
9485
9486// Compare & branch
9487instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9488  predicate(UseSSE<=1);
9489  match(Set cr (CmpD src1 src2));
9490  effect(KILL rax);
9491  ins_cost(200);
9492  format %{ "FLD    $src1\n\t"
9493            "FCOMp  $src2\n\t"
9494            "FNSTSW AX\n\t"
9495            "TEST   AX,0x400\n\t"
9496            "JZ,s   flags\n\t"
9497            "MOV    AH,1\t# unordered treat as LT\n"
9498    "flags:\tSAHF" %}
9499  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9500  ins_encode( Push_Reg_DPR(src1),
9501              OpcP, RegOpc(src2),
9502              fpu_flags);
9503  ins_pipe( pipe_slow );
9504%}
9505
9506// Compare vs zero into -1,0,1
9507instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9508  predicate(UseSSE<=1);
9509  match(Set dst (CmpD3 src1 zero));
9510  effect(KILL cr, KILL rax);
9511  ins_cost(280);
9512  format %{ "FTSTD  $dst,$src1" %}
9513  opcode(0xE4, 0xD9);
9514  ins_encode( Push_Reg_DPR(src1),
9515              OpcS, OpcP, PopFPU,
9516              CmpF_Result(dst));
9517  ins_pipe( pipe_slow );
9518%}
9519
9520// Compare into -1,0,1
9521instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9522  predicate(UseSSE<=1);
9523  match(Set dst (CmpD3 src1 src2));
9524  effect(KILL cr, KILL rax);
9525  ins_cost(300);
9526  format %{ "FCMPD  $dst,$src1,$src2" %}
9527  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9528  ins_encode( Push_Reg_DPR(src1),
9529              OpcP, RegOpc(src2),
9530              CmpF_Result(dst));
9531  ins_pipe( pipe_slow );
9532%}
9533
9534// float compare and set condition codes in EFLAGS by XMM regs
9535instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9536  predicate(UseSSE>=2);
9537  match(Set cr (CmpD src1 src2));
9538  ins_cost(145);
9539  format %{ "UCOMISD $src1,$src2\n\t"
9540            "JNP,s   exit\n\t"
9541            "PUSHF\t# saw NaN, set CF\n\t"
9542            "AND     [rsp], #0xffffff2b\n\t"
9543            "POPF\n"
9544    "exit:" %}
9545  ins_encode %{
9546    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9547    emit_cmpfp_fixup(_masm);
9548  %}
9549  ins_pipe( pipe_slow );
9550%}
9551
9552instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9553  predicate(UseSSE>=2);
9554  match(Set cr (CmpD src1 src2));
9555  ins_cost(100);
9556  format %{ "UCOMISD $src1,$src2" %}
9557  ins_encode %{
9558    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9559  %}
9560  ins_pipe( pipe_slow );
9561%}
9562
9563// float compare and set condition codes in EFLAGS by XMM regs
9564instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9565  predicate(UseSSE>=2);
9566  match(Set cr (CmpD src1 (LoadD src2)));
9567  ins_cost(145);
9568  format %{ "UCOMISD $src1,$src2\n\t"
9569            "JNP,s   exit\n\t"
9570            "PUSHF\t# saw NaN, set CF\n\t"
9571            "AND     [rsp], #0xffffff2b\n\t"
9572            "POPF\n"
9573    "exit:" %}
9574  ins_encode %{
9575    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9576    emit_cmpfp_fixup(_masm);
9577  %}
9578  ins_pipe( pipe_slow );
9579%}
9580
9581instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9582  predicate(UseSSE>=2);
9583  match(Set cr (CmpD src1 (LoadD src2)));
9584  ins_cost(100);
9585  format %{ "UCOMISD $src1,$src2" %}
9586  ins_encode %{
9587    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9588  %}
9589  ins_pipe( pipe_slow );
9590%}
9591
9592// Compare into -1,0,1 in XMM
9593instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9594  predicate(UseSSE>=2);
9595  match(Set dst (CmpD3 src1 src2));
9596  effect(KILL cr);
9597  ins_cost(255);
9598  format %{ "UCOMISD $src1, $src2\n\t"
9599            "MOV     $dst, #-1\n\t"
9600            "JP,s    done\n\t"
9601            "JB,s    done\n\t"
9602            "SETNE   $dst\n\t"
9603            "MOVZB   $dst, $dst\n"
9604    "done:" %}
9605  ins_encode %{
9606    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9607    emit_cmpfp3(_masm, $dst$$Register);
9608  %}
9609  ins_pipe( pipe_slow );
9610%}
9611
9612// Compare into -1,0,1 in XMM and memory
9613instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9614  predicate(UseSSE>=2);
9615  match(Set dst (CmpD3 src1 (LoadD src2)));
9616  effect(KILL cr);
9617  ins_cost(275);
9618  format %{ "UCOMISD $src1, $src2\n\t"
9619            "MOV     $dst, #-1\n\t"
9620            "JP,s    done\n\t"
9621            "JB,s    done\n\t"
9622            "SETNE   $dst\n\t"
9623            "MOVZB   $dst, $dst\n"
9624    "done:" %}
9625  ins_encode %{
9626    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9627    emit_cmpfp3(_masm, $dst$$Register);
9628  %}
9629  ins_pipe( pipe_slow );
9630%}
9631
9632
9633instruct subDPR_reg(regDPR dst, regDPR src) %{
9634  predicate (UseSSE <=1);
9635  match(Set dst (SubD dst src));
9636
9637  format %{ "FLD    $src\n\t"
9638            "DSUBp  $dst,ST" %}
9639  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9640  ins_cost(150);
9641  ins_encode( Push_Reg_DPR(src),
9642              OpcP, RegOpc(dst) );
9643  ins_pipe( fpu_reg_reg );
9644%}
9645
9646instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9647  predicate (UseSSE <=1);
9648  match(Set dst (RoundDouble (SubD src1 src2)));
9649  ins_cost(250);
9650
9651  format %{ "FLD    $src2\n\t"
9652            "DSUB   ST,$src1\n\t"
9653            "FSTP_D $dst\t# D-round" %}
9654  opcode(0xD8, 0x5);
9655  ins_encode( Push_Reg_DPR(src2),
9656              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9657  ins_pipe( fpu_mem_reg_reg );
9658%}
9659
9660
9661instruct subDPR_reg_mem(regDPR dst, memory src) %{
9662  predicate (UseSSE <=1);
9663  match(Set dst (SubD dst (LoadD src)));
9664  ins_cost(150);
9665
9666  format %{ "FLD    $src\n\t"
9667            "DSUBp  $dst,ST" %}
9668  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9669  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9670              OpcP, RegOpc(dst) );
9671  ins_pipe( fpu_reg_mem );
9672%}
9673
9674instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9675  predicate (UseSSE<=1);
9676  match(Set dst (AbsD src));
9677  ins_cost(100);
9678  format %{ "FABS" %}
9679  opcode(0xE1, 0xD9);
9680  ins_encode( OpcS, OpcP );
9681  ins_pipe( fpu_reg_reg );
9682%}
9683
9684instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9685  predicate(UseSSE<=1);
9686  match(Set dst (NegD src));
9687  ins_cost(100);
9688  format %{ "FCHS" %}
9689  opcode(0xE0, 0xD9);
9690  ins_encode( OpcS, OpcP );
9691  ins_pipe( fpu_reg_reg );
9692%}
9693
9694instruct addDPR_reg(regDPR dst, regDPR src) %{
9695  predicate(UseSSE<=1);
9696  match(Set dst (AddD dst src));
9697  format %{ "FLD    $src\n\t"
9698            "DADD   $dst,ST" %}
9699  size(4);
9700  ins_cost(150);
9701  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9702  ins_encode( Push_Reg_DPR(src),
9703              OpcP, RegOpc(dst) );
9704  ins_pipe( fpu_reg_reg );
9705%}
9706
9707
9708instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9709  predicate(UseSSE<=1);
9710  match(Set dst (RoundDouble (AddD src1 src2)));
9711  ins_cost(250);
9712
9713  format %{ "FLD    $src2\n\t"
9714            "DADD   ST,$src1\n\t"
9715            "FSTP_D $dst\t# D-round" %}
9716  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9717  ins_encode( Push_Reg_DPR(src2),
9718              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9719  ins_pipe( fpu_mem_reg_reg );
9720%}
9721
9722
9723instruct addDPR_reg_mem(regDPR dst, memory src) %{
9724  predicate(UseSSE<=1);
9725  match(Set dst (AddD dst (LoadD src)));
9726  ins_cost(150);
9727
9728  format %{ "FLD    $src\n\t"
9729            "DADDp  $dst,ST" %}
9730  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9731  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9732              OpcP, RegOpc(dst) );
9733  ins_pipe( fpu_reg_mem );
9734%}
9735
9736// add-to-memory
9737instruct addDPR_mem_reg(memory dst, regDPR src) %{
9738  predicate(UseSSE<=1);
9739  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9740  ins_cost(150);
9741
9742  format %{ "FLD_D  $dst\n\t"
9743            "DADD   ST,$src\n\t"
9744            "FST_D  $dst" %}
9745  opcode(0xDD, 0x0);
9746  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9747              Opcode(0xD8), RegOpc(src),
9748              set_instruction_start,
9749              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9750  ins_pipe( fpu_reg_mem );
9751%}
9752
9753instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9754  predicate(UseSSE<=1);
9755  match(Set dst (AddD dst con));
9756  ins_cost(125);
9757  format %{ "FLD1\n\t"
9758            "DADDp  $dst,ST" %}
9759  ins_encode %{
9760    __ fld1();
9761    __ faddp($dst$$reg);
9762  %}
9763  ins_pipe(fpu_reg);
9764%}
9765
9766instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9767  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9768  match(Set dst (AddD dst con));
9769  ins_cost(200);
9770  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9771            "DADDp  $dst,ST" %}
9772  ins_encode %{
9773    __ fld_d($constantaddress($con));
9774    __ faddp($dst$$reg);
9775  %}
9776  ins_pipe(fpu_reg_mem);
9777%}
9778
9779instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9780  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9781  match(Set dst (RoundDouble (AddD src con)));
9782  ins_cost(200);
9783  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9784            "DADD   ST,$src\n\t"
9785            "FSTP_D $dst\t# D-round" %}
9786  ins_encode %{
9787    __ fld_d($constantaddress($con));
9788    __ fadd($src$$reg);
9789    __ fstp_d(Address(rsp, $dst$$disp));
9790  %}
9791  ins_pipe(fpu_mem_reg_con);
9792%}
9793
9794instruct mulDPR_reg(regDPR dst, regDPR src) %{
9795  predicate(UseSSE<=1);
9796  match(Set dst (MulD dst src));
9797  format %{ "FLD    $src\n\t"
9798            "DMULp  $dst,ST" %}
9799  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9800  ins_cost(150);
9801  ins_encode( Push_Reg_DPR(src),
9802              OpcP, RegOpc(dst) );
9803  ins_pipe( fpu_reg_reg );
9804%}
9805
9806// Strict FP instruction biases argument before multiply then
9807// biases result to avoid double rounding of subnormals.
9808//
9809// scale arg1 by multiplying arg1 by 2^(-15360)
9810// load arg2
9811// multiply scaled arg1 by arg2
9812// rescale product by 2^(15360)
9813//
9814instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9815  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9816  match(Set dst (MulD dst src));
9817  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9818
9819  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9820            "DMULp  $dst,ST\n\t"
9821            "FLD    $src\n\t"
9822            "DMULp  $dst,ST\n\t"
9823            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9824            "DMULp  $dst,ST\n\t" %}
9825  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9826  ins_encode( strictfp_bias1(dst),
9827              Push_Reg_DPR(src),
9828              OpcP, RegOpc(dst),
9829              strictfp_bias2(dst) );
9830  ins_pipe( fpu_reg_reg );
9831%}
9832
9833instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9834  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9835  match(Set dst (MulD dst con));
9836  ins_cost(200);
9837  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9838            "DMULp  $dst,ST" %}
9839  ins_encode %{
9840    __ fld_d($constantaddress($con));
9841    __ fmulp($dst$$reg);
9842  %}
9843  ins_pipe(fpu_reg_mem);
9844%}
9845
9846
9847instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9848  predicate( UseSSE<=1 );
9849  match(Set dst (MulD dst (LoadD src)));
9850  ins_cost(200);
9851  format %{ "FLD_D  $src\n\t"
9852            "DMULp  $dst,ST" %}
9853  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9854  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9855              OpcP, RegOpc(dst) );
9856  ins_pipe( fpu_reg_mem );
9857%}
9858
9859//
9860// Cisc-alternate to reg-reg multiply
9861instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9862  predicate( UseSSE<=1 );
9863  match(Set dst (MulD src (LoadD mem)));
9864  ins_cost(250);
9865  format %{ "FLD_D  $mem\n\t"
9866            "DMUL   ST,$src\n\t"
9867            "FSTP_D $dst" %}
9868  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9869  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9870              OpcReg_FPR(src),
9871              Pop_Reg_DPR(dst) );
9872  ins_pipe( fpu_reg_reg_mem );
9873%}
9874
9875
9876// MACRO3 -- addDPR a mulDPR
9877// This instruction is a '2-address' instruction in that the result goes
9878// back to src2.  This eliminates a move from the macro; possibly the
9879// register allocator will have to add it back (and maybe not).
9880instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9881  predicate( UseSSE<=1 );
9882  match(Set src2 (AddD (MulD src0 src1) src2));
9883  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9884            "DMUL   ST,$src1\n\t"
9885            "DADDp  $src2,ST" %}
9886  ins_cost(250);
9887  opcode(0xDD); /* LoadD DD /0 */
9888  ins_encode( Push_Reg_FPR(src0),
9889              FMul_ST_reg(src1),
9890              FAddP_reg_ST(src2) );
9891  ins_pipe( fpu_reg_reg_reg );
9892%}
9893
9894
9895// MACRO3 -- subDPR a mulDPR
9896instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9897  predicate( UseSSE<=1 );
9898  match(Set src2 (SubD (MulD src0 src1) src2));
9899  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9900            "DMUL   ST,$src1\n\t"
9901            "DSUBRp $src2,ST" %}
9902  ins_cost(250);
9903  ins_encode( Push_Reg_FPR(src0),
9904              FMul_ST_reg(src1),
9905              Opcode(0xDE), Opc_plus(0xE0,src2));
9906  ins_pipe( fpu_reg_reg_reg );
9907%}
9908
9909
9910instruct divDPR_reg(regDPR dst, regDPR src) %{
9911  predicate( UseSSE<=1 );
9912  match(Set dst (DivD dst src));
9913
9914  format %{ "FLD    $src\n\t"
9915            "FDIVp  $dst,ST" %}
9916  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9917  ins_cost(150);
9918  ins_encode( Push_Reg_DPR(src),
9919              OpcP, RegOpc(dst) );
9920  ins_pipe( fpu_reg_reg );
9921%}
9922
9923// Strict FP instruction biases argument before division then
9924// biases result, to avoid double rounding of subnormals.
9925//
9926// scale dividend by multiplying dividend by 2^(-15360)
9927// load divisor
9928// divide scaled dividend by divisor
9929// rescale quotient by 2^(15360)
9930//
9931instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9932  predicate (UseSSE<=1);
9933  match(Set dst (DivD dst src));
9934  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9935  ins_cost(01);
9936
9937  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9938            "DMULp  $dst,ST\n\t"
9939            "FLD    $src\n\t"
9940            "FDIVp  $dst,ST\n\t"
9941            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9942            "DMULp  $dst,ST\n\t" %}
9943  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9944  ins_encode( strictfp_bias1(dst),
9945              Push_Reg_DPR(src),
9946              OpcP, RegOpc(dst),
9947              strictfp_bias2(dst) );
9948  ins_pipe( fpu_reg_reg );
9949%}
9950
9951instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9952  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9953  match(Set dst (RoundDouble (DivD src1 src2)));
9954
9955  format %{ "FLD    $src1\n\t"
9956            "FDIV   ST,$src2\n\t"
9957            "FSTP_D $dst\t# D-round" %}
9958  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9959  ins_encode( Push_Reg_DPR(src1),
9960              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9961  ins_pipe( fpu_mem_reg_reg );
9962%}
9963
9964
9965instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9966  predicate(UseSSE<=1);
9967  match(Set dst (ModD dst src));
9968  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9969
9970  format %{ "DMOD   $dst,$src" %}
9971  ins_cost(250);
9972  ins_encode(Push_Reg_Mod_DPR(dst, src),
9973              emitModDPR(),
9974              Push_Result_Mod_DPR(src),
9975              Pop_Reg_DPR(dst));
9976  ins_pipe( pipe_slow );
9977%}
9978
9979instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9980  predicate(UseSSE>=2);
9981  match(Set dst (ModD src0 src1));
9982  effect(KILL rax, KILL cr);
9983
9984  format %{ "SUB    ESP,8\t # DMOD\n"
9985          "\tMOVSD  [ESP+0],$src1\n"
9986          "\tFLD_D  [ESP+0]\n"
9987          "\tMOVSD  [ESP+0],$src0\n"
9988          "\tFLD_D  [ESP+0]\n"
9989     "loop:\tFPREM\n"
9990          "\tFWAIT\n"
9991          "\tFNSTSW AX\n"
9992          "\tSAHF\n"
9993          "\tJP     loop\n"
9994          "\tFSTP_D [ESP+0]\n"
9995          "\tMOVSD  $dst,[ESP+0]\n"
9996          "\tADD    ESP,8\n"
9997          "\tFSTP   ST0\t # Restore FPU Stack"
9998    %}
9999  ins_cost(250);
10000  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
10001  ins_pipe( pipe_slow );
10002%}
10003
10004instruct atanDPR_reg(regDPR dst, regDPR src) %{
10005  predicate (UseSSE<=1);
10006  match(Set dst(AtanD dst src));
10007  format %{ "DATA   $dst,$src" %}
10008  opcode(0xD9, 0xF3);
10009  ins_encode( Push_Reg_DPR(src),
10010              OpcP, OpcS, RegOpc(dst) );
10011  ins_pipe( pipe_slow );
10012%}
10013
10014instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10015  predicate (UseSSE>=2);
10016  match(Set dst(AtanD dst src));
10017  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10018  format %{ "DATA   $dst,$src" %}
10019  opcode(0xD9, 0xF3);
10020  ins_encode( Push_SrcD(src),
10021              OpcP, OpcS, Push_ResultD(dst) );
10022  ins_pipe( pipe_slow );
10023%}
10024
10025instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10026  predicate (UseSSE<=1);
10027  match(Set dst (SqrtD src));
10028  format %{ "DSQRT  $dst,$src" %}
10029  opcode(0xFA, 0xD9);
10030  ins_encode( Push_Reg_DPR(src),
10031              OpcS, OpcP, Pop_Reg_DPR(dst) );
10032  ins_pipe( pipe_slow );
10033%}
10034
10035//-------------Float Instructions-------------------------------
10036// Float Math
10037
10038// Code for float compare:
10039//     fcompp();
10040//     fwait(); fnstsw_ax();
10041//     sahf();
10042//     movl(dst, unordered_result);
10043//     jcc(Assembler::parity, exit);
10044//     movl(dst, less_result);
10045//     jcc(Assembler::below, exit);
10046//     movl(dst, equal_result);
10047//     jcc(Assembler::equal, exit);
10048//     movl(dst, greater_result);
10049//   exit:
10050
10051// P6 version of float compare, sets condition codes in EFLAGS
10052instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10053  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10054  match(Set cr (CmpF src1 src2));
10055  effect(KILL rax);
10056  ins_cost(150);
10057  format %{ "FLD    $src1\n\t"
10058            "FUCOMIP ST,$src2  // P6 instruction\n\t"
10059            "JNP    exit\n\t"
10060            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10061            "SAHF\n"
10062     "exit:\tNOP               // avoid branch to branch" %}
10063  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10064  ins_encode( Push_Reg_DPR(src1),
10065              OpcP, RegOpc(src2),
10066              cmpF_P6_fixup );
10067  ins_pipe( pipe_slow );
10068%}
10069
10070instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10071  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10072  match(Set cr (CmpF src1 src2));
10073  ins_cost(100);
10074  format %{ "FLD    $src1\n\t"
10075            "FUCOMIP ST,$src2  // P6 instruction" %}
10076  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10077  ins_encode( Push_Reg_DPR(src1),
10078              OpcP, RegOpc(src2));
10079  ins_pipe( pipe_slow );
10080%}
10081
10082
10083// Compare & branch
10084instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10085  predicate(UseSSE == 0);
10086  match(Set cr (CmpF src1 src2));
10087  effect(KILL rax);
10088  ins_cost(200);
10089  format %{ "FLD    $src1\n\t"
10090            "FCOMp  $src2\n\t"
10091            "FNSTSW AX\n\t"
10092            "TEST   AX,0x400\n\t"
10093            "JZ,s   flags\n\t"
10094            "MOV    AH,1\t# unordered treat as LT\n"
10095    "flags:\tSAHF" %}
10096  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10097  ins_encode( Push_Reg_DPR(src1),
10098              OpcP, RegOpc(src2),
10099              fpu_flags);
10100  ins_pipe( pipe_slow );
10101%}
10102
10103// Compare vs zero into -1,0,1
10104instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10105  predicate(UseSSE == 0);
10106  match(Set dst (CmpF3 src1 zero));
10107  effect(KILL cr, KILL rax);
10108  ins_cost(280);
10109  format %{ "FTSTF  $dst,$src1" %}
10110  opcode(0xE4, 0xD9);
10111  ins_encode( Push_Reg_DPR(src1),
10112              OpcS, OpcP, PopFPU,
10113              CmpF_Result(dst));
10114  ins_pipe( pipe_slow );
10115%}
10116
10117// Compare into -1,0,1
10118instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10119  predicate(UseSSE == 0);
10120  match(Set dst (CmpF3 src1 src2));
10121  effect(KILL cr, KILL rax);
10122  ins_cost(300);
10123  format %{ "FCMPF  $dst,$src1,$src2" %}
10124  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10125  ins_encode( Push_Reg_DPR(src1),
10126              OpcP, RegOpc(src2),
10127              CmpF_Result(dst));
10128  ins_pipe( pipe_slow );
10129%}
10130
10131// float compare and set condition codes in EFLAGS by XMM regs
10132instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10133  predicate(UseSSE>=1);
10134  match(Set cr (CmpF src1 src2));
10135  ins_cost(145);
10136  format %{ "UCOMISS $src1,$src2\n\t"
10137            "JNP,s   exit\n\t"
10138            "PUSHF\t# saw NaN, set CF\n\t"
10139            "AND     [rsp], #0xffffff2b\n\t"
10140            "POPF\n"
10141    "exit:" %}
10142  ins_encode %{
10143    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10144    emit_cmpfp_fixup(_masm);
10145  %}
10146  ins_pipe( pipe_slow );
10147%}
10148
10149instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10150  predicate(UseSSE>=1);
10151  match(Set cr (CmpF src1 src2));
10152  ins_cost(100);
10153  format %{ "UCOMISS $src1,$src2" %}
10154  ins_encode %{
10155    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10156  %}
10157  ins_pipe( pipe_slow );
10158%}
10159
10160// float compare and set condition codes in EFLAGS by XMM regs
10161instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10162  predicate(UseSSE>=1);
10163  match(Set cr (CmpF src1 (LoadF src2)));
10164  ins_cost(165);
10165  format %{ "UCOMISS $src1,$src2\n\t"
10166            "JNP,s   exit\n\t"
10167            "PUSHF\t# saw NaN, set CF\n\t"
10168            "AND     [rsp], #0xffffff2b\n\t"
10169            "POPF\n"
10170    "exit:" %}
10171  ins_encode %{
10172    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10173    emit_cmpfp_fixup(_masm);
10174  %}
10175  ins_pipe( pipe_slow );
10176%}
10177
10178instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10179  predicate(UseSSE>=1);
10180  match(Set cr (CmpF src1 (LoadF src2)));
10181  ins_cost(100);
10182  format %{ "UCOMISS $src1,$src2" %}
10183  ins_encode %{
10184    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10185  %}
10186  ins_pipe( pipe_slow );
10187%}
10188
10189// Compare into -1,0,1 in XMM
10190instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10191  predicate(UseSSE>=1);
10192  match(Set dst (CmpF3 src1 src2));
10193  effect(KILL cr);
10194  ins_cost(255);
10195  format %{ "UCOMISS $src1, $src2\n\t"
10196            "MOV     $dst, #-1\n\t"
10197            "JP,s    done\n\t"
10198            "JB,s    done\n\t"
10199            "SETNE   $dst\n\t"
10200            "MOVZB   $dst, $dst\n"
10201    "done:" %}
10202  ins_encode %{
10203    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10204    emit_cmpfp3(_masm, $dst$$Register);
10205  %}
10206  ins_pipe( pipe_slow );
10207%}
10208
10209// Compare into -1,0,1 in XMM and memory
10210instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10211  predicate(UseSSE>=1);
10212  match(Set dst (CmpF3 src1 (LoadF src2)));
10213  effect(KILL cr);
10214  ins_cost(275);
10215  format %{ "UCOMISS $src1, $src2\n\t"
10216            "MOV     $dst, #-1\n\t"
10217            "JP,s    done\n\t"
10218            "JB,s    done\n\t"
10219            "SETNE   $dst\n\t"
10220            "MOVZB   $dst, $dst\n"
10221    "done:" %}
10222  ins_encode %{
10223    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10224    emit_cmpfp3(_masm, $dst$$Register);
10225  %}
10226  ins_pipe( pipe_slow );
10227%}
10228
10229// Spill to obtain 24-bit precision
10230instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10231  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10232  match(Set dst (SubF src1 src2));
10233
10234  format %{ "FSUB   $dst,$src1 - $src2" %}
10235  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10236  ins_encode( Push_Reg_FPR(src1),
10237              OpcReg_FPR(src2),
10238              Pop_Mem_FPR(dst) );
10239  ins_pipe( fpu_mem_reg_reg );
10240%}
10241//
10242// This instruction does not round to 24-bits
10243instruct subFPR_reg(regFPR dst, regFPR src) %{
10244  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10245  match(Set dst (SubF dst src));
10246
10247  format %{ "FSUB   $dst,$src" %}
10248  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10249  ins_encode( Push_Reg_FPR(src),
10250              OpcP, RegOpc(dst) );
10251  ins_pipe( fpu_reg_reg );
10252%}
10253
10254// Spill to obtain 24-bit precision
10255instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10256  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10257  match(Set dst (AddF src1 src2));
10258
10259  format %{ "FADD   $dst,$src1,$src2" %}
10260  opcode(0xD8, 0x0); /* D8 C0+i */
10261  ins_encode( Push_Reg_FPR(src2),
10262              OpcReg_FPR(src1),
10263              Pop_Mem_FPR(dst) );
10264  ins_pipe( fpu_mem_reg_reg );
10265%}
10266//
10267// This instruction does not round to 24-bits
10268instruct addFPR_reg(regFPR dst, regFPR src) %{
10269  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10270  match(Set dst (AddF dst src));
10271
10272  format %{ "FLD    $src\n\t"
10273            "FADDp  $dst,ST" %}
10274  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10275  ins_encode( Push_Reg_FPR(src),
10276              OpcP, RegOpc(dst) );
10277  ins_pipe( fpu_reg_reg );
10278%}
10279
10280instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10281  predicate(UseSSE==0);
10282  match(Set dst (AbsF src));
10283  ins_cost(100);
10284  format %{ "FABS" %}
10285  opcode(0xE1, 0xD9);
10286  ins_encode( OpcS, OpcP );
10287  ins_pipe( fpu_reg_reg );
10288%}
10289
10290instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10291  predicate(UseSSE==0);
10292  match(Set dst (NegF src));
10293  ins_cost(100);
10294  format %{ "FCHS" %}
10295  opcode(0xE0, 0xD9);
10296  ins_encode( OpcS, OpcP );
10297  ins_pipe( fpu_reg_reg );
10298%}
10299
10300// Cisc-alternate to addFPR_reg
10301// Spill to obtain 24-bit precision
10302instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10303  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10304  match(Set dst (AddF src1 (LoadF src2)));
10305
10306  format %{ "FLD    $src2\n\t"
10307            "FADD   ST,$src1\n\t"
10308            "FSTP_S $dst" %}
10309  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10310  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10311              OpcReg_FPR(src1),
10312              Pop_Mem_FPR(dst) );
10313  ins_pipe( fpu_mem_reg_mem );
10314%}
10315//
10316// Cisc-alternate to addFPR_reg
10317// This instruction does not round to 24-bits
10318instruct addFPR_reg_mem(regFPR dst, memory src) %{
10319  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10320  match(Set dst (AddF dst (LoadF src)));
10321
10322  format %{ "FADD   $dst,$src" %}
10323  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10324  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10325              OpcP, RegOpc(dst) );
10326  ins_pipe( fpu_reg_mem );
10327%}
10328
10329// // Following two instructions for _222_mpegaudio
10330// Spill to obtain 24-bit precision
10331instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10332  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10333  match(Set dst (AddF src1 src2));
10334
10335  format %{ "FADD   $dst,$src1,$src2" %}
10336  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10337  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10338              OpcReg_FPR(src2),
10339              Pop_Mem_FPR(dst) );
10340  ins_pipe( fpu_mem_reg_mem );
10341%}
10342
10343// Cisc-spill variant
10344// Spill to obtain 24-bit precision
10345instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10346  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10347  match(Set dst (AddF src1 (LoadF src2)));
10348
10349  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10350  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10351  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10352              set_instruction_start,
10353              OpcP, RMopc_Mem(secondary,src1),
10354              Pop_Mem_FPR(dst) );
10355  ins_pipe( fpu_mem_mem_mem );
10356%}
10357
10358// Spill to obtain 24-bit precision
10359instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10360  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10361  match(Set dst (AddF src1 src2));
10362
10363  format %{ "FADD   $dst,$src1,$src2" %}
10364  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10365  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10366              set_instruction_start,
10367              OpcP, RMopc_Mem(secondary,src1),
10368              Pop_Mem_FPR(dst) );
10369  ins_pipe( fpu_mem_mem_mem );
10370%}
10371
10372
10373// Spill to obtain 24-bit precision
10374instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10375  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10376  match(Set dst (AddF src con));
10377  format %{ "FLD    $src\n\t"
10378            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10379            "FSTP_S $dst"  %}
10380  ins_encode %{
10381    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10382    __ fadd_s($constantaddress($con));
10383    __ fstp_s(Address(rsp, $dst$$disp));
10384  %}
10385  ins_pipe(fpu_mem_reg_con);
10386%}
10387//
10388// This instruction does not round to 24-bits
10389instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10390  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10391  match(Set dst (AddF src con));
10392  format %{ "FLD    $src\n\t"
10393            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10394            "FSTP   $dst"  %}
10395  ins_encode %{
10396    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10397    __ fadd_s($constantaddress($con));
10398    __ fstp_d($dst$$reg);
10399  %}
10400  ins_pipe(fpu_reg_reg_con);
10401%}
10402
10403// Spill to obtain 24-bit precision
10404instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10405  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10406  match(Set dst (MulF src1 src2));
10407
10408  format %{ "FLD    $src1\n\t"
10409            "FMUL   $src2\n\t"
10410            "FSTP_S $dst"  %}
10411  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10412  ins_encode( Push_Reg_FPR(src1),
10413              OpcReg_FPR(src2),
10414              Pop_Mem_FPR(dst) );
10415  ins_pipe( fpu_mem_reg_reg );
10416%}
10417//
10418// This instruction does not round to 24-bits
10419instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10420  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10421  match(Set dst (MulF src1 src2));
10422
10423  format %{ "FLD    $src1\n\t"
10424            "FMUL   $src2\n\t"
10425            "FSTP_S $dst"  %}
10426  opcode(0xD8, 0x1); /* D8 C8+i */
10427  ins_encode( Push_Reg_FPR(src2),
10428              OpcReg_FPR(src1),
10429              Pop_Reg_FPR(dst) );
10430  ins_pipe( fpu_reg_reg_reg );
10431%}
10432
10433
10434// Spill to obtain 24-bit precision
10435// Cisc-alternate to reg-reg multiply
10436instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10437  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10438  match(Set dst (MulF src1 (LoadF src2)));
10439
10440  format %{ "FLD_S  $src2\n\t"
10441            "FMUL   $src1\n\t"
10442            "FSTP_S $dst"  %}
10443  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10444  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10445              OpcReg_FPR(src1),
10446              Pop_Mem_FPR(dst) );
10447  ins_pipe( fpu_mem_reg_mem );
10448%}
10449//
10450// This instruction does not round to 24-bits
10451// Cisc-alternate to reg-reg multiply
10452instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10453  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10454  match(Set dst (MulF src1 (LoadF src2)));
10455
10456  format %{ "FMUL   $dst,$src1,$src2" %}
10457  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10458  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10459              OpcReg_FPR(src1),
10460              Pop_Reg_FPR(dst) );
10461  ins_pipe( fpu_reg_reg_mem );
10462%}
10463
10464// Spill to obtain 24-bit precision
10465instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10466  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10467  match(Set dst (MulF src1 src2));
10468
10469  format %{ "FMUL   $dst,$src1,$src2" %}
10470  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10471  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10472              set_instruction_start,
10473              OpcP, RMopc_Mem(secondary,src1),
10474              Pop_Mem_FPR(dst) );
10475  ins_pipe( fpu_mem_mem_mem );
10476%}
10477
10478// Spill to obtain 24-bit precision
10479instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10480  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10481  match(Set dst (MulF src con));
10482
10483  format %{ "FLD    $src\n\t"
10484            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10485            "FSTP_S $dst"  %}
10486  ins_encode %{
10487    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10488    __ fmul_s($constantaddress($con));
10489    __ fstp_s(Address(rsp, $dst$$disp));
10490  %}
10491  ins_pipe(fpu_mem_reg_con);
10492%}
10493//
10494// This instruction does not round to 24-bits
10495instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10496  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10497  match(Set dst (MulF src con));
10498
10499  format %{ "FLD    $src\n\t"
10500            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10501            "FSTP   $dst"  %}
10502  ins_encode %{
10503    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10504    __ fmul_s($constantaddress($con));
10505    __ fstp_d($dst$$reg);
10506  %}
10507  ins_pipe(fpu_reg_reg_con);
10508%}
10509
10510
10511//
10512// MACRO1 -- subsume unshared load into mulFPR
10513// This instruction does not round to 24-bits
10514instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10515  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10516  match(Set dst (MulF (LoadF mem1) src));
10517
10518  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10519            "FMUL   ST,$src\n\t"
10520            "FSTP   $dst" %}
10521  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10522  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10523              OpcReg_FPR(src),
10524              Pop_Reg_FPR(dst) );
10525  ins_pipe( fpu_reg_reg_mem );
10526%}
10527//
10528// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10529// This instruction does not round to 24-bits
10530instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10531  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10532  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10533  ins_cost(95);
10534
10535  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10536            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10537            "FADD   ST,$src2\n\t"
10538            "FSTP   $dst" %}
10539  opcode(0xD9); /* LoadF D9 /0 */
10540  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10541              FMul_ST_reg(src1),
10542              FAdd_ST_reg(src2),
10543              Pop_Reg_FPR(dst) );
10544  ins_pipe( fpu_reg_mem_reg_reg );
10545%}
10546
10547// MACRO3 -- addFPR a mulFPR
10548// This instruction does not round to 24-bits.  It is a '2-address'
10549// instruction in that the result goes back to src2.  This eliminates
10550// a move from the macro; possibly the register allocator will have
10551// to add it back (and maybe not).
10552instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10553  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10554  match(Set src2 (AddF (MulF src0 src1) src2));
10555
10556  format %{ "FLD    $src0     ===MACRO3===\n\t"
10557            "FMUL   ST,$src1\n\t"
10558            "FADDP  $src2,ST" %}
10559  opcode(0xD9); /* LoadF D9 /0 */
10560  ins_encode( Push_Reg_FPR(src0),
10561              FMul_ST_reg(src1),
10562              FAddP_reg_ST(src2) );
10563  ins_pipe( fpu_reg_reg_reg );
10564%}
10565
10566// MACRO4 -- divFPR subFPR
10567// This instruction does not round to 24-bits
10568instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10569  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10570  match(Set dst (DivF (SubF src2 src1) src3));
10571
10572  format %{ "FLD    $src2   ===MACRO4===\n\t"
10573            "FSUB   ST,$src1\n\t"
10574            "FDIV   ST,$src3\n\t"
10575            "FSTP  $dst" %}
10576  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10577  ins_encode( Push_Reg_FPR(src2),
10578              subFPR_divFPR_encode(src1,src3),
10579              Pop_Reg_FPR(dst) );
10580  ins_pipe( fpu_reg_reg_reg_reg );
10581%}
10582
10583// Spill to obtain 24-bit precision
10584instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10585  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10586  match(Set dst (DivF src1 src2));
10587
10588  format %{ "FDIV   $dst,$src1,$src2" %}
10589  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10590  ins_encode( Push_Reg_FPR(src1),
10591              OpcReg_FPR(src2),
10592              Pop_Mem_FPR(dst) );
10593  ins_pipe( fpu_mem_reg_reg );
10594%}
10595//
10596// This instruction does not round to 24-bits
10597instruct divFPR_reg(regFPR dst, regFPR src) %{
10598  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10599  match(Set dst (DivF dst src));
10600
10601  format %{ "FDIV   $dst,$src" %}
10602  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10603  ins_encode( Push_Reg_FPR(src),
10604              OpcP, RegOpc(dst) );
10605  ins_pipe( fpu_reg_reg );
10606%}
10607
10608
10609// Spill to obtain 24-bit precision
10610instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10611  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10612  match(Set dst (ModF src1 src2));
10613  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10614
10615  format %{ "FMOD   $dst,$src1,$src2" %}
10616  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10617              emitModDPR(),
10618              Push_Result_Mod_DPR(src2),
10619              Pop_Mem_FPR(dst));
10620  ins_pipe( pipe_slow );
10621%}
10622//
10623// This instruction does not round to 24-bits
10624instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10625  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10626  match(Set dst (ModF dst src));
10627  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10628
10629  format %{ "FMOD   $dst,$src" %}
10630  ins_encode(Push_Reg_Mod_DPR(dst, src),
10631              emitModDPR(),
10632              Push_Result_Mod_DPR(src),
10633              Pop_Reg_FPR(dst));
10634  ins_pipe( pipe_slow );
10635%}
10636
10637instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10638  predicate(UseSSE>=1);
10639  match(Set dst (ModF src0 src1));
10640  effect(KILL rax, KILL cr);
10641  format %{ "SUB    ESP,4\t # FMOD\n"
10642          "\tMOVSS  [ESP+0],$src1\n"
10643          "\tFLD_S  [ESP+0]\n"
10644          "\tMOVSS  [ESP+0],$src0\n"
10645          "\tFLD_S  [ESP+0]\n"
10646     "loop:\tFPREM\n"
10647          "\tFWAIT\n"
10648          "\tFNSTSW AX\n"
10649          "\tSAHF\n"
10650          "\tJP     loop\n"
10651          "\tFSTP_S [ESP+0]\n"
10652          "\tMOVSS  $dst,[ESP+0]\n"
10653          "\tADD    ESP,4\n"
10654          "\tFSTP   ST0\t # Restore FPU Stack"
10655    %}
10656  ins_cost(250);
10657  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10658  ins_pipe( pipe_slow );
10659%}
10660
10661
10662//----------Arithmetic Conversion Instructions---------------------------------
10663// The conversions operations are all Alpha sorted.  Please keep it that way!
10664
10665instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10666  predicate(UseSSE==0);
10667  match(Set dst (RoundFloat src));
10668  ins_cost(125);
10669  format %{ "FST_S  $dst,$src\t# F-round" %}
10670  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10671  ins_pipe( fpu_mem_reg );
10672%}
10673
10674instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10675  predicate(UseSSE<=1);
10676  match(Set dst (RoundDouble src));
10677  ins_cost(125);
10678  format %{ "FST_D  $dst,$src\t# D-round" %}
10679  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10680  ins_pipe( fpu_mem_reg );
10681%}
10682
10683// Force rounding to 24-bit precision and 6-bit exponent
10684instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10685  predicate(UseSSE==0);
10686  match(Set dst (ConvD2F src));
10687  format %{ "FST_S  $dst,$src\t# F-round" %}
10688  expand %{
10689    roundFloat_mem_reg(dst,src);
10690  %}
10691%}
10692
10693// Force rounding to 24-bit precision and 6-bit exponent
10694instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10695  predicate(UseSSE==1);
10696  match(Set dst (ConvD2F src));
10697  effect( KILL cr );
10698  format %{ "SUB    ESP,4\n\t"
10699            "FST_S  [ESP],$src\t# F-round\n\t"
10700            "MOVSS  $dst,[ESP]\n\t"
10701            "ADD ESP,4" %}
10702  ins_encode %{
10703    __ subptr(rsp, 4);
10704    if ($src$$reg != FPR1L_enc) {
10705      __ fld_s($src$$reg-1);
10706      __ fstp_s(Address(rsp, 0));
10707    } else {
10708      __ fst_s(Address(rsp, 0));
10709    }
10710    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10711    __ addptr(rsp, 4);
10712  %}
10713  ins_pipe( pipe_slow );
10714%}
10715
10716// Force rounding double precision to single precision
10717instruct convD2F_reg(regF dst, regD src) %{
10718  predicate(UseSSE>=2);
10719  match(Set dst (ConvD2F src));
10720  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10721  ins_encode %{
10722    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10723  %}
10724  ins_pipe( pipe_slow );
10725%}
10726
10727instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10728  predicate(UseSSE==0);
10729  match(Set dst (ConvF2D src));
10730  format %{ "FST_S  $dst,$src\t# D-round" %}
10731  ins_encode( Pop_Reg_Reg_DPR(dst, src));
10732  ins_pipe( fpu_reg_reg );
10733%}
10734
10735instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10736  predicate(UseSSE==1);
10737  match(Set dst (ConvF2D src));
10738  format %{ "FST_D  $dst,$src\t# D-round" %}
10739  expand %{
10740    roundDouble_mem_reg(dst,src);
10741  %}
10742%}
10743
10744instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10745  predicate(UseSSE==1);
10746  match(Set dst (ConvF2D src));
10747  effect( KILL cr );
10748  format %{ "SUB    ESP,4\n\t"
10749            "MOVSS  [ESP] $src\n\t"
10750            "FLD_S  [ESP]\n\t"
10751            "ADD    ESP,4\n\t"
10752            "FSTP   $dst\t# D-round" %}
10753  ins_encode %{
10754    __ subptr(rsp, 4);
10755    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10756    __ fld_s(Address(rsp, 0));
10757    __ addptr(rsp, 4);
10758    __ fstp_d($dst$$reg);
10759  %}
10760  ins_pipe( pipe_slow );
10761%}
10762
10763instruct convF2D_reg(regD dst, regF src) %{
10764  predicate(UseSSE>=2);
10765  match(Set dst (ConvF2D src));
10766  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10767  ins_encode %{
10768    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10769  %}
10770  ins_pipe( pipe_slow );
10771%}
10772
10773// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10774instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10775  predicate(UseSSE<=1);
10776  match(Set dst (ConvD2I src));
10777  effect( KILL tmp, KILL cr );
10778  format %{ "FLD    $src\t# Convert double to int \n\t"
10779            "FLDCW  trunc mode\n\t"
10780            "SUB    ESP,4\n\t"
10781            "FISTp  [ESP + #0]\n\t"
10782            "FLDCW  std/24-bit mode\n\t"
10783            "POP    EAX\n\t"
10784            "CMP    EAX,0x80000000\n\t"
10785            "JNE,s  fast\n\t"
10786            "FLD_D  $src\n\t"
10787            "CALL   d2i_wrapper\n"
10788      "fast:" %}
10789  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10790  ins_pipe( pipe_slow );
10791%}
10792
10793// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10794instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10795  predicate(UseSSE>=2);
10796  match(Set dst (ConvD2I src));
10797  effect( KILL tmp, KILL cr );
10798  format %{ "CVTTSD2SI $dst, $src\n\t"
10799            "CMP    $dst,0x80000000\n\t"
10800            "JNE,s  fast\n\t"
10801            "SUB    ESP, 8\n\t"
10802            "MOVSD  [ESP], $src\n\t"
10803            "FLD_D  [ESP]\n\t"
10804            "ADD    ESP, 8\n\t"
10805            "CALL   d2i_wrapper\n"
10806      "fast:" %}
10807  ins_encode %{
10808    Label fast;
10809    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10810    __ cmpl($dst$$Register, 0x80000000);
10811    __ jccb(Assembler::notEqual, fast);
10812    __ subptr(rsp, 8);
10813    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10814    __ fld_d(Address(rsp, 0));
10815    __ addptr(rsp, 8);
10816    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10817    __ bind(fast);
10818  %}
10819  ins_pipe( pipe_slow );
10820%}
10821
10822instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10823  predicate(UseSSE<=1);
10824  match(Set dst (ConvD2L src));
10825  effect( KILL cr );
10826  format %{ "FLD    $src\t# Convert double to long\n\t"
10827            "FLDCW  trunc mode\n\t"
10828            "SUB    ESP,8\n\t"
10829            "FISTp  [ESP + #0]\n\t"
10830            "FLDCW  std/24-bit mode\n\t"
10831            "POP    EAX\n\t"
10832            "POP    EDX\n\t"
10833            "CMP    EDX,0x80000000\n\t"
10834            "JNE,s  fast\n\t"
10835            "TEST   EAX,EAX\n\t"
10836            "JNE,s  fast\n\t"
10837            "FLD    $src\n\t"
10838            "CALL   d2l_wrapper\n"
10839      "fast:" %}
10840  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10841  ins_pipe( pipe_slow );
10842%}
10843
10844// XMM lacks a float/double->long conversion, so use the old FPU stack.
10845instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10846  predicate (UseSSE>=2);
10847  match(Set dst (ConvD2L src));
10848  effect( KILL cr );
10849  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10850            "MOVSD  [ESP],$src\n\t"
10851            "FLD_D  [ESP]\n\t"
10852            "FLDCW  trunc mode\n\t"
10853            "FISTp  [ESP + #0]\n\t"
10854            "FLDCW  std/24-bit mode\n\t"
10855            "POP    EAX\n\t"
10856            "POP    EDX\n\t"
10857            "CMP    EDX,0x80000000\n\t"
10858            "JNE,s  fast\n\t"
10859            "TEST   EAX,EAX\n\t"
10860            "JNE,s  fast\n\t"
10861            "SUB    ESP,8\n\t"
10862            "MOVSD  [ESP],$src\n\t"
10863            "FLD_D  [ESP]\n\t"
10864            "ADD    ESP,8\n\t"
10865            "CALL   d2l_wrapper\n"
10866      "fast:" %}
10867  ins_encode %{
10868    Label fast;
10869    __ subptr(rsp, 8);
10870    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10871    __ fld_d(Address(rsp, 0));
10872    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10873    __ fistp_d(Address(rsp, 0));
10874    // Restore the rounding mode, mask the exception
10875    if (Compile::current()->in_24_bit_fp_mode()) {
10876      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10877    } else {
10878      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10879    }
10880    // Load the converted long, adjust CPU stack
10881    __ pop(rax);
10882    __ pop(rdx);
10883    __ cmpl(rdx, 0x80000000);
10884    __ jccb(Assembler::notEqual, fast);
10885    __ testl(rax, rax);
10886    __ jccb(Assembler::notEqual, fast);
10887    __ subptr(rsp, 8);
10888    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10889    __ fld_d(Address(rsp, 0));
10890    __ addptr(rsp, 8);
10891    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10892    __ bind(fast);
10893  %}
10894  ins_pipe( pipe_slow );
10895%}
10896
10897// Convert a double to an int.  Java semantics require we do complex
10898// manglations in the corner cases.  So we set the rounding mode to
10899// 'zero', store the darned double down as an int, and reset the
10900// rounding mode to 'nearest'.  The hardware stores a flag value down
10901// if we would overflow or converted a NAN; we check for this and
10902// and go the slow path if needed.
10903instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10904  predicate(UseSSE==0);
10905  match(Set dst (ConvF2I src));
10906  effect( KILL tmp, KILL cr );
10907  format %{ "FLD    $src\t# Convert float to int \n\t"
10908            "FLDCW  trunc mode\n\t"
10909            "SUB    ESP,4\n\t"
10910            "FISTp  [ESP + #0]\n\t"
10911            "FLDCW  std/24-bit mode\n\t"
10912            "POP    EAX\n\t"
10913            "CMP    EAX,0x80000000\n\t"
10914            "JNE,s  fast\n\t"
10915            "FLD    $src\n\t"
10916            "CALL   d2i_wrapper\n"
10917      "fast:" %}
10918  // DPR2I_encoding works for FPR2I
10919  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10920  ins_pipe( pipe_slow );
10921%}
10922
10923// Convert a float in xmm to an int reg.
10924instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10925  predicate(UseSSE>=1);
10926  match(Set dst (ConvF2I src));
10927  effect( KILL tmp, KILL cr );
10928  format %{ "CVTTSS2SI $dst, $src\n\t"
10929            "CMP    $dst,0x80000000\n\t"
10930            "JNE,s  fast\n\t"
10931            "SUB    ESP, 4\n\t"
10932            "MOVSS  [ESP], $src\n\t"
10933            "FLD    [ESP]\n\t"
10934            "ADD    ESP, 4\n\t"
10935            "CALL   d2i_wrapper\n"
10936      "fast:" %}
10937  ins_encode %{
10938    Label fast;
10939    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10940    __ cmpl($dst$$Register, 0x80000000);
10941    __ jccb(Assembler::notEqual, fast);
10942    __ subptr(rsp, 4);
10943    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10944    __ fld_s(Address(rsp, 0));
10945    __ addptr(rsp, 4);
10946    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10947    __ bind(fast);
10948  %}
10949  ins_pipe( pipe_slow );
10950%}
10951
10952instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10953  predicate(UseSSE==0);
10954  match(Set dst (ConvF2L src));
10955  effect( KILL cr );
10956  format %{ "FLD    $src\t# Convert float to long\n\t"
10957            "FLDCW  trunc mode\n\t"
10958            "SUB    ESP,8\n\t"
10959            "FISTp  [ESP + #0]\n\t"
10960            "FLDCW  std/24-bit mode\n\t"
10961            "POP    EAX\n\t"
10962            "POP    EDX\n\t"
10963            "CMP    EDX,0x80000000\n\t"
10964            "JNE,s  fast\n\t"
10965            "TEST   EAX,EAX\n\t"
10966            "JNE,s  fast\n\t"
10967            "FLD    $src\n\t"
10968            "CALL   d2l_wrapper\n"
10969      "fast:" %}
10970  // DPR2L_encoding works for FPR2L
10971  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10972  ins_pipe( pipe_slow );
10973%}
10974
10975// XMM lacks a float/double->long conversion, so use the old FPU stack.
10976instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10977  predicate (UseSSE>=1);
10978  match(Set dst (ConvF2L src));
10979  effect( KILL cr );
10980  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10981            "MOVSS  [ESP],$src\n\t"
10982            "FLD_S  [ESP]\n\t"
10983            "FLDCW  trunc mode\n\t"
10984            "FISTp  [ESP + #0]\n\t"
10985            "FLDCW  std/24-bit mode\n\t"
10986            "POP    EAX\n\t"
10987            "POP    EDX\n\t"
10988            "CMP    EDX,0x80000000\n\t"
10989            "JNE,s  fast\n\t"
10990            "TEST   EAX,EAX\n\t"
10991            "JNE,s  fast\n\t"
10992            "SUB    ESP,4\t# Convert float to long\n\t"
10993            "MOVSS  [ESP],$src\n\t"
10994            "FLD_S  [ESP]\n\t"
10995            "ADD    ESP,4\n\t"
10996            "CALL   d2l_wrapper\n"
10997      "fast:" %}
10998  ins_encode %{
10999    Label fast;
11000    __ subptr(rsp, 8);
11001    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11002    __ fld_s(Address(rsp, 0));
11003    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11004    __ fistp_d(Address(rsp, 0));
11005    // Restore the rounding mode, mask the exception
11006    if (Compile::current()->in_24_bit_fp_mode()) {
11007      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11008    } else {
11009      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11010    }
11011    // Load the converted long, adjust CPU stack
11012    __ pop(rax);
11013    __ pop(rdx);
11014    __ cmpl(rdx, 0x80000000);
11015    __ jccb(Assembler::notEqual, fast);
11016    __ testl(rax, rax);
11017    __ jccb(Assembler::notEqual, fast);
11018    __ subptr(rsp, 4);
11019    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11020    __ fld_s(Address(rsp, 0));
11021    __ addptr(rsp, 4);
11022    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11023    __ bind(fast);
11024  %}
11025  ins_pipe( pipe_slow );
11026%}
11027
11028instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11029  predicate( UseSSE<=1 );
11030  match(Set dst (ConvI2D src));
11031  format %{ "FILD   $src\n\t"
11032            "FSTP   $dst" %}
11033  opcode(0xDB, 0x0);  /* DB /0 */
11034  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11035  ins_pipe( fpu_reg_mem );
11036%}
11037
11038instruct convI2D_reg(regD dst, rRegI src) %{
11039  predicate( UseSSE>=2 && !UseXmmI2D );
11040  match(Set dst (ConvI2D src));
11041  format %{ "CVTSI2SD $dst,$src" %}
11042  ins_encode %{
11043    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11044  %}
11045  ins_pipe( pipe_slow );
11046%}
11047
11048instruct convI2D_mem(regD dst, memory mem) %{
11049  predicate( UseSSE>=2 );
11050  match(Set dst (ConvI2D (LoadI mem)));
11051  format %{ "CVTSI2SD $dst,$mem" %}
11052  ins_encode %{
11053    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11054  %}
11055  ins_pipe( pipe_slow );
11056%}
11057
11058instruct convXI2D_reg(regD dst, rRegI src)
11059%{
11060  predicate( UseSSE>=2 && UseXmmI2D );
11061  match(Set dst (ConvI2D src));
11062
11063  format %{ "MOVD  $dst,$src\n\t"
11064            "CVTDQ2PD $dst,$dst\t# i2d" %}
11065  ins_encode %{
11066    __ movdl($dst$$XMMRegister, $src$$Register);
11067    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11068  %}
11069  ins_pipe(pipe_slow); // XXX
11070%}
11071
11072instruct convI2DPR_mem(regDPR dst, memory mem) %{
11073  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11074  match(Set dst (ConvI2D (LoadI mem)));
11075  format %{ "FILD   $mem\n\t"
11076            "FSTP   $dst" %}
11077  opcode(0xDB);      /* DB /0 */
11078  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11079              Pop_Reg_DPR(dst));
11080  ins_pipe( fpu_reg_mem );
11081%}
11082
11083// Convert a byte to a float; no rounding step needed.
11084instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11085  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11086  match(Set dst (ConvI2F src));
11087  format %{ "FILD   $src\n\t"
11088            "FSTP   $dst" %}
11089
11090  opcode(0xDB, 0x0);  /* DB /0 */
11091  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11092  ins_pipe( fpu_reg_mem );
11093%}
11094
11095// In 24-bit mode, force exponent rounding by storing back out
11096instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11097  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11098  match(Set dst (ConvI2F src));
11099  ins_cost(200);
11100  format %{ "FILD   $src\n\t"
11101            "FSTP_S $dst" %}
11102  opcode(0xDB, 0x0);  /* DB /0 */
11103  ins_encode( Push_Mem_I(src),
11104              Pop_Mem_FPR(dst));
11105  ins_pipe( fpu_mem_mem );
11106%}
11107
11108// In 24-bit mode, force exponent rounding by storing back out
11109instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11110  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11111  match(Set dst (ConvI2F (LoadI mem)));
11112  ins_cost(200);
11113  format %{ "FILD   $mem\n\t"
11114            "FSTP_S $dst" %}
11115  opcode(0xDB);  /* DB /0 */
11116  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11117              Pop_Mem_FPR(dst));
11118  ins_pipe( fpu_mem_mem );
11119%}
11120
11121// This instruction does not round to 24-bits
11122instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11123  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11124  match(Set dst (ConvI2F src));
11125  format %{ "FILD   $src\n\t"
11126            "FSTP   $dst" %}
11127  opcode(0xDB, 0x0);  /* DB /0 */
11128  ins_encode( Push_Mem_I(src),
11129              Pop_Reg_FPR(dst));
11130  ins_pipe( fpu_reg_mem );
11131%}
11132
11133// This instruction does not round to 24-bits
11134instruct convI2FPR_mem(regFPR dst, memory mem) %{
11135  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11136  match(Set dst (ConvI2F (LoadI mem)));
11137  format %{ "FILD   $mem\n\t"
11138            "FSTP   $dst" %}
11139  opcode(0xDB);      /* DB /0 */
11140  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11141              Pop_Reg_FPR(dst));
11142  ins_pipe( fpu_reg_mem );
11143%}
11144
11145// Convert an int to a float in xmm; no rounding step needed.
11146instruct convI2F_reg(regF dst, rRegI src) %{
11147  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11148  match(Set dst (ConvI2F src));
11149  format %{ "CVTSI2SS $dst, $src" %}
11150  ins_encode %{
11151    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11152  %}
11153  ins_pipe( pipe_slow );
11154%}
11155
11156 instruct convXI2F_reg(regF dst, rRegI src)
11157%{
11158  predicate( UseSSE>=2 && UseXmmI2F );
11159  match(Set dst (ConvI2F src));
11160
11161  format %{ "MOVD  $dst,$src\n\t"
11162            "CVTDQ2PS $dst,$dst\t# i2f" %}
11163  ins_encode %{
11164    __ movdl($dst$$XMMRegister, $src$$Register);
11165    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11166  %}
11167  ins_pipe(pipe_slow); // XXX
11168%}
11169
11170instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11171  match(Set dst (ConvI2L src));
11172  effect(KILL cr);
11173  ins_cost(375);
11174  format %{ "MOV    $dst.lo,$src\n\t"
11175            "MOV    $dst.hi,$src\n\t"
11176            "SAR    $dst.hi,31" %}
11177  ins_encode(convert_int_long(dst,src));
11178  ins_pipe( ialu_reg_reg_long );
11179%}
11180
11181// Zero-extend convert int to long
11182instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11183  match(Set dst (AndL (ConvI2L src) mask) );
11184  effect( KILL flags );
11185  ins_cost(250);
11186  format %{ "MOV    $dst.lo,$src\n\t"
11187            "XOR    $dst.hi,$dst.hi" %}
11188  opcode(0x33); // XOR
11189  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11190  ins_pipe( ialu_reg_reg_long );
11191%}
11192
11193// Zero-extend long
11194instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11195  match(Set dst (AndL src mask) );
11196  effect( KILL flags );
11197  ins_cost(250);
11198  format %{ "MOV    $dst.lo,$src.lo\n\t"
11199            "XOR    $dst.hi,$dst.hi\n\t" %}
11200  opcode(0x33); // XOR
11201  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11202  ins_pipe( ialu_reg_reg_long );
11203%}
11204
11205instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11206  predicate (UseSSE<=1);
11207  match(Set dst (ConvL2D src));
11208  effect( KILL cr );
11209  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11210            "PUSH   $src.lo\n\t"
11211            "FILD   ST,[ESP + #0]\n\t"
11212            "ADD    ESP,8\n\t"
11213            "FSTP_D $dst\t# D-round" %}
11214  opcode(0xDF, 0x5);  /* DF /5 */
11215  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11216  ins_pipe( pipe_slow );
11217%}
11218
11219instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11220  predicate (UseSSE>=2);
11221  match(Set dst (ConvL2D src));
11222  effect( KILL cr );
11223  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11224            "PUSH   $src.lo\n\t"
11225            "FILD_D [ESP]\n\t"
11226            "FSTP_D [ESP]\n\t"
11227            "MOVSD  $dst,[ESP]\n\t"
11228            "ADD    ESP,8" %}
11229  opcode(0xDF, 0x5);  /* DF /5 */
11230  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11231  ins_pipe( pipe_slow );
11232%}
11233
11234instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11235  predicate (UseSSE>=1);
11236  match(Set dst (ConvL2F src));
11237  effect( KILL cr );
11238  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11239            "PUSH   $src.lo\n\t"
11240            "FILD_D [ESP]\n\t"
11241            "FSTP_S [ESP]\n\t"
11242            "MOVSS  $dst,[ESP]\n\t"
11243            "ADD    ESP,8" %}
11244  opcode(0xDF, 0x5);  /* DF /5 */
11245  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11246  ins_pipe( pipe_slow );
11247%}
11248
11249instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11250  match(Set dst (ConvL2F src));
11251  effect( KILL cr );
11252  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11253            "PUSH   $src.lo\n\t"
11254            "FILD   ST,[ESP + #0]\n\t"
11255            "ADD    ESP,8\n\t"
11256            "FSTP_S $dst\t# F-round" %}
11257  opcode(0xDF, 0x5);  /* DF /5 */
11258  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11259  ins_pipe( pipe_slow );
11260%}
11261
11262instruct convL2I_reg( rRegI dst, eRegL src ) %{
11263  match(Set dst (ConvL2I src));
11264  effect( DEF dst, USE src );
11265  format %{ "MOV    $dst,$src.lo" %}
11266  ins_encode(enc_CopyL_Lo(dst,src));
11267  ins_pipe( ialu_reg_reg );
11268%}
11269
11270instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11271  match(Set dst (MoveF2I src));
11272  effect( DEF dst, USE src );
11273  ins_cost(100);
11274  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11275  ins_encode %{
11276    __ movl($dst$$Register, Address(rsp, $src$$disp));
11277  %}
11278  ins_pipe( ialu_reg_mem );
11279%}
11280
11281instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11282  predicate(UseSSE==0);
11283  match(Set dst (MoveF2I src));
11284  effect( DEF dst, USE src );
11285
11286  ins_cost(125);
11287  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11288  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11289  ins_pipe( fpu_mem_reg );
11290%}
11291
11292instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11293  predicate(UseSSE>=1);
11294  match(Set dst (MoveF2I src));
11295  effect( DEF dst, USE src );
11296
11297  ins_cost(95);
11298  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11299  ins_encode %{
11300    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11301  %}
11302  ins_pipe( pipe_slow );
11303%}
11304
11305instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11306  predicate(UseSSE>=2);
11307  match(Set dst (MoveF2I src));
11308  effect( DEF dst, USE src );
11309  ins_cost(85);
11310  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11311  ins_encode %{
11312    __ movdl($dst$$Register, $src$$XMMRegister);
11313  %}
11314  ins_pipe( pipe_slow );
11315%}
11316
11317instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11318  match(Set dst (MoveI2F src));
11319  effect( DEF dst, USE src );
11320
11321  ins_cost(100);
11322  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11323  ins_encode %{
11324    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11325  %}
11326  ins_pipe( ialu_mem_reg );
11327%}
11328
11329
11330instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11331  predicate(UseSSE==0);
11332  match(Set dst (MoveI2F src));
11333  effect(DEF dst, USE src);
11334
11335  ins_cost(125);
11336  format %{ "FLD_S  $src\n\t"
11337            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11338  opcode(0xD9);               /* D9 /0, FLD m32real */
11339  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11340              Pop_Reg_FPR(dst) );
11341  ins_pipe( fpu_reg_mem );
11342%}
11343
11344instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11345  predicate(UseSSE>=1);
11346  match(Set dst (MoveI2F src));
11347  effect( DEF dst, USE src );
11348
11349  ins_cost(95);
11350  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11351  ins_encode %{
11352    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11353  %}
11354  ins_pipe( pipe_slow );
11355%}
11356
11357instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11358  predicate(UseSSE>=2);
11359  match(Set dst (MoveI2F src));
11360  effect( DEF dst, USE src );
11361
11362  ins_cost(85);
11363  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11364  ins_encode %{
11365    __ movdl($dst$$XMMRegister, $src$$Register);
11366  %}
11367  ins_pipe( pipe_slow );
11368%}
11369
11370instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11371  match(Set dst (MoveD2L src));
11372  effect(DEF dst, USE src);
11373
11374  ins_cost(250);
11375  format %{ "MOV    $dst.lo,$src\n\t"
11376            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11377  opcode(0x8B, 0x8B);
11378  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11379  ins_pipe( ialu_mem_long_reg );
11380%}
11381
11382instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11383  predicate(UseSSE<=1);
11384  match(Set dst (MoveD2L src));
11385  effect(DEF dst, USE src);
11386
11387  ins_cost(125);
11388  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11389  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11390  ins_pipe( fpu_mem_reg );
11391%}
11392
11393instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11394  predicate(UseSSE>=2);
11395  match(Set dst (MoveD2L src));
11396  effect(DEF dst, USE src);
11397  ins_cost(95);
11398  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11399  ins_encode %{
11400    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11401  %}
11402  ins_pipe( pipe_slow );
11403%}
11404
11405instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11406  predicate(UseSSE>=2);
11407  match(Set dst (MoveD2L src));
11408  effect(DEF dst, USE src, TEMP tmp);
11409  ins_cost(85);
11410  format %{ "MOVD   $dst.lo,$src\n\t"
11411            "PSHUFLW $tmp,$src,0x4E\n\t"
11412            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11413  ins_encode %{
11414    __ movdl($dst$$Register, $src$$XMMRegister);
11415    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11416    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11417  %}
11418  ins_pipe( pipe_slow );
11419%}
11420
11421instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11422  match(Set dst (MoveL2D src));
11423  effect(DEF dst, USE src);
11424
11425  ins_cost(200);
11426  format %{ "MOV    $dst,$src.lo\n\t"
11427            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11428  opcode(0x89, 0x89);
11429  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11430  ins_pipe( ialu_mem_long_reg );
11431%}
11432
11433
11434instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11435  predicate(UseSSE<=1);
11436  match(Set dst (MoveL2D src));
11437  effect(DEF dst, USE src);
11438  ins_cost(125);
11439
11440  format %{ "FLD_D  $src\n\t"
11441            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11442  opcode(0xDD);               /* DD /0, FLD m64real */
11443  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11444              Pop_Reg_DPR(dst) );
11445  ins_pipe( fpu_reg_mem );
11446%}
11447
11448
11449instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11450  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11451  match(Set dst (MoveL2D src));
11452  effect(DEF dst, USE src);
11453
11454  ins_cost(95);
11455  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11456  ins_encode %{
11457    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11458  %}
11459  ins_pipe( pipe_slow );
11460%}
11461
11462instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11463  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11464  match(Set dst (MoveL2D src));
11465  effect(DEF dst, USE src);
11466
11467  ins_cost(95);
11468  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11469  ins_encode %{
11470    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11471  %}
11472  ins_pipe( pipe_slow );
11473%}
11474
11475instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11476  predicate(UseSSE>=2);
11477  match(Set dst (MoveL2D src));
11478  effect(TEMP dst, USE src, TEMP tmp);
11479  ins_cost(85);
11480  format %{ "MOVD   $dst,$src.lo\n\t"
11481            "MOVD   $tmp,$src.hi\n\t"
11482            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11483  ins_encode %{
11484    __ movdl($dst$$XMMRegister, $src$$Register);
11485    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11486    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11487  %}
11488  ins_pipe( pipe_slow );
11489%}
11490
11491
11492// =======================================================================
11493// fast clearing of an array
11494instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11495  predicate(!((ClearArrayNode*)n)->is_large());
11496  match(Set dummy (ClearArray cnt base));
11497  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11498
11499  format %{ $$template
11500    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11501    $$emit$$"CMP    InitArrayShortSize,rcx\n\t"
11502    $$emit$$"JG     LARGE\n\t"
11503    $$emit$$"SHL    ECX, 1\n\t"
11504    $$emit$$"DEC    ECX\n\t"
11505    $$emit$$"JS     DONE\t# Zero length\n\t"
11506    $$emit$$"MOV    EAX,(EDI,ECX,4)\t# LOOP\n\t"
11507    $$emit$$"DEC    ECX\n\t"
11508    $$emit$$"JGE    LOOP\n\t"
11509    $$emit$$"JMP    DONE\n\t"
11510    $$emit$$"# LARGE:\n\t"
11511    if (UseFastStosb) {
11512       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11513       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11514    } else {
11515       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11516       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11517    }
11518    $$emit$$"# DONE"
11519  %}
11520  ins_encode %{
11521    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, false);
11522  %}
11523  ins_pipe( pipe_slow );
11524%}
11525
11526instruct rep_stos_large(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11527  predicate(((ClearArrayNode*)n)->is_large());
11528  match(Set dummy (ClearArray cnt base));
11529  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11530  format %{ $$template
11531    $$emit$$"XOR    EAX,EAX\t# ClearArray:\n\t"
11532    if (UseFastStosb) {
11533       $$emit$$"SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11534       $$emit$$"REP STOSB\t# store EAX into [EDI++] while ECX--\n\t"
11535    } else {
11536       $$emit$$"SHL    ECX,1\t# Convert doublewords to words\n\t"
11537       $$emit$$"REP STOS\t# store EAX into [EDI++] while ECX--\n\t"
11538    }
11539    $$emit$$"# DONE"
11540  %}
11541  ins_encode %{
11542    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register, true);
11543  %}
11544  ins_pipe( pipe_slow );
11545%}
11546
11547instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11548                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11549  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11550  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11551  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11552
11553  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11554  ins_encode %{
11555    __ string_compare($str1$$Register, $str2$$Register,
11556                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11557                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11558  %}
11559  ins_pipe( pipe_slow );
11560%}
11561
11562instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11563                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11564  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11565  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11566  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11567
11568  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11569  ins_encode %{
11570    __ string_compare($str1$$Register, $str2$$Register,
11571                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11572                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11573  %}
11574  ins_pipe( pipe_slow );
11575%}
11576
11577instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11578                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11579  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11580  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11581  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11582
11583  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11584  ins_encode %{
11585    __ string_compare($str1$$Register, $str2$$Register,
11586                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11587                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11588  %}
11589  ins_pipe( pipe_slow );
11590%}
11591
11592instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11593                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11594  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11595  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11596  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11597
11598  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11599  ins_encode %{
11600    __ string_compare($str2$$Register, $str1$$Register,
11601                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11602                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11603  %}
11604  ins_pipe( pipe_slow );
11605%}
11606
11607// fast string equals
11608instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11609                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11610  match(Set result (StrEquals (Binary str1 str2) cnt));
11611  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11612
11613  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11614  ins_encode %{
11615    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11616                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11617                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11618  %}
11619
11620  ins_pipe( pipe_slow );
11621%}
11622
11623// fast search of substring with known size.
11624instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11625                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11626  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11627  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11628  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11629
11630  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11631  ins_encode %{
11632    int icnt2 = (int)$int_cnt2$$constant;
11633    if (icnt2 >= 16) {
11634      // IndexOf for constant substrings with size >= 16 elements
11635      // which don't need to be loaded through stack.
11636      __ string_indexofC8($str1$$Register, $str2$$Register,
11637                          $cnt1$$Register, $cnt2$$Register,
11638                          icnt2, $result$$Register,
11639                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11640    } else {
11641      // Small strings are loaded through stack if they cross page boundary.
11642      __ string_indexof($str1$$Register, $str2$$Register,
11643                        $cnt1$$Register, $cnt2$$Register,
11644                        icnt2, $result$$Register,
11645                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11646    }
11647  %}
11648  ins_pipe( pipe_slow );
11649%}
11650
11651// fast search of substring with known size.
11652instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11653                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11654  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11655  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11656  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11657
11658  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11659  ins_encode %{
11660    int icnt2 = (int)$int_cnt2$$constant;
11661    if (icnt2 >= 8) {
11662      // IndexOf for constant substrings with size >= 8 elements
11663      // which don't need to be loaded through stack.
11664      __ string_indexofC8($str1$$Register, $str2$$Register,
11665                          $cnt1$$Register, $cnt2$$Register,
11666                          icnt2, $result$$Register,
11667                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11668    } else {
11669      // Small strings are loaded through stack if they cross page boundary.
11670      __ string_indexof($str1$$Register, $str2$$Register,
11671                        $cnt1$$Register, $cnt2$$Register,
11672                        icnt2, $result$$Register,
11673                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11674    }
11675  %}
11676  ins_pipe( pipe_slow );
11677%}
11678
11679// fast search of substring with known size.
11680instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11681                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11682  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11683  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11684  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11685
11686  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11687  ins_encode %{
11688    int icnt2 = (int)$int_cnt2$$constant;
11689    if (icnt2 >= 8) {
11690      // IndexOf for constant substrings with size >= 8 elements
11691      // which don't need to be loaded through stack.
11692      __ string_indexofC8($str1$$Register, $str2$$Register,
11693                          $cnt1$$Register, $cnt2$$Register,
11694                          icnt2, $result$$Register,
11695                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11696    } else {
11697      // Small strings are loaded through stack if they cross page boundary.
11698      __ string_indexof($str1$$Register, $str2$$Register,
11699                        $cnt1$$Register, $cnt2$$Register,
11700                        icnt2, $result$$Register,
11701                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11702    }
11703  %}
11704  ins_pipe( pipe_slow );
11705%}
11706
11707instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11708                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11709  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11710  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11711  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11712
11713  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11714  ins_encode %{
11715    __ string_indexof($str1$$Register, $str2$$Register,
11716                      $cnt1$$Register, $cnt2$$Register,
11717                      (-1), $result$$Register,
11718                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11719  %}
11720  ins_pipe( pipe_slow );
11721%}
11722
11723instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11724                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11725  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11726  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11727  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11728
11729  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11730  ins_encode %{
11731    __ string_indexof($str1$$Register, $str2$$Register,
11732                      $cnt1$$Register, $cnt2$$Register,
11733                      (-1), $result$$Register,
11734                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11735  %}
11736  ins_pipe( pipe_slow );
11737%}
11738
11739instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11740                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11741  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11742  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11743  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11744
11745  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11746  ins_encode %{
11747    __ string_indexof($str1$$Register, $str2$$Register,
11748                      $cnt1$$Register, $cnt2$$Register,
11749                      (-1), $result$$Register,
11750                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11751  %}
11752  ins_pipe( pipe_slow );
11753%}
11754
11755instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11756                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11757  predicate(UseSSE42Intrinsics);
11758  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11759  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11760  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11761  ins_encode %{
11762    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11763                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11764  %}
11765  ins_pipe( pipe_slow );
11766%}
11767
11768// fast array equals
11769instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11770                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11771%{
11772  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11773  match(Set result (AryEq ary1 ary2));
11774  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11775  //ins_cost(300);
11776
11777  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11778  ins_encode %{
11779    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11780                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11781                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11782  %}
11783  ins_pipe( pipe_slow );
11784%}
11785
11786instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11787                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11788%{
11789  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11790  match(Set result (AryEq ary1 ary2));
11791  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11792  //ins_cost(300);
11793
11794  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11795  ins_encode %{
11796    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11797                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11798                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11799  %}
11800  ins_pipe( pipe_slow );
11801%}
11802
11803instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11804                      regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11805%{
11806  match(Set result (HasNegatives ary1 len));
11807  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11808
11809  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11810  ins_encode %{
11811    __ has_negatives($ary1$$Register, $len$$Register,
11812                     $result$$Register, $tmp3$$Register,
11813                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11814  %}
11815  ins_pipe( pipe_slow );
11816%}
11817
11818// fast char[] to byte[] compression
11819instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11820                         eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11821  match(Set result (StrCompressedCopy src (Binary dst len)));
11822  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11823
11824  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11825  ins_encode %{
11826    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11827                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11828                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11829  %}
11830  ins_pipe( pipe_slow );
11831%}
11832
11833// fast byte[] to char[] inflation
11834instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11835                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11836  match(Set dummy (StrInflatedCopy src (Binary dst len)));
11837  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11838
11839  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11840  ins_encode %{
11841    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11842                          $tmp1$$XMMRegister, $tmp2$$Register);
11843  %}
11844  ins_pipe( pipe_slow );
11845%}
11846
11847// encode char[] to byte[] in ISO_8859_1
11848instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11849                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11850                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11851  match(Set result (EncodeISOArray src (Binary dst len)));
11852  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11853
11854  format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11855  ins_encode %{
11856    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11857                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11858                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11859  %}
11860  ins_pipe( pipe_slow );
11861%}
11862
11863
11864//----------Control Flow Instructions------------------------------------------
11865// Signed compare Instructions
11866instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11867  match(Set cr (CmpI op1 op2));
11868  effect( DEF cr, USE op1, USE op2 );
11869  format %{ "CMP    $op1,$op2" %}
11870  opcode(0x3B);  /* Opcode 3B /r */
11871  ins_encode( OpcP, RegReg( op1, op2) );
11872  ins_pipe( ialu_cr_reg_reg );
11873%}
11874
11875instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11876  match(Set cr (CmpI op1 op2));
11877  effect( DEF cr, USE op1 );
11878  format %{ "CMP    $op1,$op2" %}
11879  opcode(0x81,0x07);  /* Opcode 81 /7 */
11880  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11881  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11882  ins_pipe( ialu_cr_reg_imm );
11883%}
11884
11885// Cisc-spilled version of cmpI_eReg
11886instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11887  match(Set cr (CmpI op1 (LoadI op2)));
11888
11889  format %{ "CMP    $op1,$op2" %}
11890  ins_cost(500);
11891  opcode(0x3B);  /* Opcode 3B /r */
11892  ins_encode( OpcP, RegMem( op1, op2) );
11893  ins_pipe( ialu_cr_reg_mem );
11894%}
11895
11896instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11897  match(Set cr (CmpI src zero));
11898  effect( DEF cr, USE src );
11899
11900  format %{ "TEST   $src,$src" %}
11901  opcode(0x85);
11902  ins_encode( OpcP, RegReg( src, src ) );
11903  ins_pipe( ialu_cr_reg_imm );
11904%}
11905
11906instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11907  match(Set cr (CmpI (AndI src con) zero));
11908
11909  format %{ "TEST   $src,$con" %}
11910  opcode(0xF7,0x00);
11911  ins_encode( OpcP, RegOpc(src), Con32(con) );
11912  ins_pipe( ialu_cr_reg_imm );
11913%}
11914
11915instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11916  match(Set cr (CmpI (AndI src mem) zero));
11917
11918  format %{ "TEST   $src,$mem" %}
11919  opcode(0x85);
11920  ins_encode( OpcP, RegMem( src, mem ) );
11921  ins_pipe( ialu_cr_reg_mem );
11922%}
11923
11924// Unsigned compare Instructions; really, same as signed except they
11925// produce an eFlagsRegU instead of eFlagsReg.
11926instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11927  match(Set cr (CmpU op1 op2));
11928
11929  format %{ "CMPu   $op1,$op2" %}
11930  opcode(0x3B);  /* Opcode 3B /r */
11931  ins_encode( OpcP, RegReg( op1, op2) );
11932  ins_pipe( ialu_cr_reg_reg );
11933%}
11934
11935instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11936  match(Set cr (CmpU op1 op2));
11937
11938  format %{ "CMPu   $op1,$op2" %}
11939  opcode(0x81,0x07);  /* Opcode 81 /7 */
11940  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11941  ins_pipe( ialu_cr_reg_imm );
11942%}
11943
11944// // Cisc-spilled version of cmpU_eReg
11945instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11946  match(Set cr (CmpU op1 (LoadI op2)));
11947
11948  format %{ "CMPu   $op1,$op2" %}
11949  ins_cost(500);
11950  opcode(0x3B);  /* Opcode 3B /r */
11951  ins_encode( OpcP, RegMem( op1, op2) );
11952  ins_pipe( ialu_cr_reg_mem );
11953%}
11954
11955// // Cisc-spilled version of cmpU_eReg
11956//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11957//  match(Set cr (CmpU (LoadI op1) op2));
11958//
11959//  format %{ "CMPu   $op1,$op2" %}
11960//  ins_cost(500);
11961//  opcode(0x39);  /* Opcode 39 /r */
11962//  ins_encode( OpcP, RegMem( op1, op2) );
11963//%}
11964
11965instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11966  match(Set cr (CmpU src zero));
11967
11968  format %{ "TESTu  $src,$src" %}
11969  opcode(0x85);
11970  ins_encode( OpcP, RegReg( src, src ) );
11971  ins_pipe( ialu_cr_reg_imm );
11972%}
11973
11974// Unsigned pointer compare Instructions
11975instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11976  match(Set cr (CmpP op1 op2));
11977
11978  format %{ "CMPu   $op1,$op2" %}
11979  opcode(0x3B);  /* Opcode 3B /r */
11980  ins_encode( OpcP, RegReg( op1, op2) );
11981  ins_pipe( ialu_cr_reg_reg );
11982%}
11983
11984instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11985  match(Set cr (CmpP op1 op2));
11986
11987  format %{ "CMPu   $op1,$op2" %}
11988  opcode(0x81,0x07);  /* Opcode 81 /7 */
11989  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11990  ins_pipe( ialu_cr_reg_imm );
11991%}
11992
11993// // Cisc-spilled version of cmpP_eReg
11994instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11995  match(Set cr (CmpP op1 (LoadP op2)));
11996
11997  format %{ "CMPu   $op1,$op2" %}
11998  ins_cost(500);
11999  opcode(0x3B);  /* Opcode 3B /r */
12000  ins_encode( OpcP, RegMem( op1, op2) );
12001  ins_pipe( ialu_cr_reg_mem );
12002%}
12003
12004// // Cisc-spilled version of cmpP_eReg
12005//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12006//  match(Set cr (CmpP (LoadP op1) op2));
12007//
12008//  format %{ "CMPu   $op1,$op2" %}
12009//  ins_cost(500);
12010//  opcode(0x39);  /* Opcode 39 /r */
12011//  ins_encode( OpcP, RegMem( op1, op2) );
12012//%}
12013
12014// Compare raw pointer (used in out-of-heap check).
12015// Only works because non-oop pointers must be raw pointers
12016// and raw pointers have no anti-dependencies.
12017instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12018  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
12019  match(Set cr (CmpP op1 (LoadP op2)));
12020
12021  format %{ "CMPu   $op1,$op2" %}
12022  opcode(0x3B);  /* Opcode 3B /r */
12023  ins_encode( OpcP, RegMem( op1, op2) );
12024  ins_pipe( ialu_cr_reg_mem );
12025%}
12026
12027//
12028// This will generate a signed flags result. This should be ok
12029// since any compare to a zero should be eq/neq.
12030instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12031  match(Set cr (CmpP src zero));
12032
12033  format %{ "TEST   $src,$src" %}
12034  opcode(0x85);
12035  ins_encode( OpcP, RegReg( src, src ) );
12036  ins_pipe( ialu_cr_reg_imm );
12037%}
12038
12039// Cisc-spilled version of testP_reg
12040// This will generate a signed flags result. This should be ok
12041// since any compare to a zero should be eq/neq.
12042instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12043  match(Set cr (CmpP (LoadP op) zero));
12044
12045  format %{ "TEST   $op,0xFFFFFFFF" %}
12046  ins_cost(500);
12047  opcode(0xF7);               /* Opcode F7 /0 */
12048  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12049  ins_pipe( ialu_cr_reg_imm );
12050%}
12051
12052// Yanked all unsigned pointer compare operations.
12053// Pointer compares are done with CmpP which is already unsigned.
12054
12055//----------Max and Min--------------------------------------------------------
12056// Min Instructions
12057////
12058//   *** Min and Max using the conditional move are slower than the
12059//   *** branch version on a Pentium III.
12060// // Conditional move for min
12061//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12062//  effect( USE_DEF op2, USE op1, USE cr );
12063//  format %{ "CMOVlt $op2,$op1\t! min" %}
12064//  opcode(0x4C,0x0F);
12065//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12066//  ins_pipe( pipe_cmov_reg );
12067//%}
12068//
12069//// Min Register with Register (P6 version)
12070//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
12071//  predicate(VM_Version::supports_cmov() );
12072//  match(Set op2 (MinI op1 op2));
12073//  ins_cost(200);
12074//  expand %{
12075//    eFlagsReg cr;
12076//    compI_eReg(cr,op1,op2);
12077//    cmovI_reg_lt(op2,op1,cr);
12078//  %}
12079//%}
12080
12081// Min Register with Register (generic version)
12082instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12083  match(Set dst (MinI dst src));
12084  effect(KILL flags);
12085  ins_cost(300);
12086
12087  format %{ "MIN    $dst,$src" %}
12088  opcode(0xCC);
12089  ins_encode( min_enc(dst,src) );
12090  ins_pipe( pipe_slow );
12091%}
12092
12093// Max Register with Register
12094//   *** Min and Max using the conditional move are slower than the
12095//   *** branch version on a Pentium III.
12096// // Conditional move for max
12097//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
12098//  effect( USE_DEF op2, USE op1, USE cr );
12099//  format %{ "CMOVgt $op2,$op1\t! max" %}
12100//  opcode(0x4F,0x0F);
12101//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12102//  ins_pipe( pipe_cmov_reg );
12103//%}
12104//
12105// // Max Register with Register (P6 version)
12106//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12107//  predicate(VM_Version::supports_cmov() );
12108//  match(Set op2 (MaxI op1 op2));
12109//  ins_cost(200);
12110//  expand %{
12111//    eFlagsReg cr;
12112//    compI_eReg(cr,op1,op2);
12113//    cmovI_reg_gt(op2,op1,cr);
12114//  %}
12115//%}
12116
12117// Max Register with Register (generic version)
12118instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12119  match(Set dst (MaxI dst src));
12120  effect(KILL flags);
12121  ins_cost(300);
12122
12123  format %{ "MAX    $dst,$src" %}
12124  opcode(0xCC);
12125  ins_encode( max_enc(dst,src) );
12126  ins_pipe( pipe_slow );
12127%}
12128
12129// ============================================================================
12130// Counted Loop limit node which represents exact final iterator value.
12131// Note: the resulting value should fit into integer range since
12132// counted loops have limit check on overflow.
12133instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12134  match(Set limit (LoopLimit (Binary init limit) stride));
12135  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12136  ins_cost(300);
12137
12138  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12139  ins_encode %{
12140    int strd = (int)$stride$$constant;
12141    assert(strd != 1 && strd != -1, "sanity");
12142    int m1 = (strd > 0) ? 1 : -1;
12143    // Convert limit to long (EAX:EDX)
12144    __ cdql();
12145    // Convert init to long (init:tmp)
12146    __ movl($tmp$$Register, $init$$Register);
12147    __ sarl($tmp$$Register, 31);
12148    // $limit - $init
12149    __ subl($limit$$Register, $init$$Register);
12150    __ sbbl($limit_hi$$Register, $tmp$$Register);
12151    // + ($stride - 1)
12152    if (strd > 0) {
12153      __ addl($limit$$Register, (strd - 1));
12154      __ adcl($limit_hi$$Register, 0);
12155      __ movl($tmp$$Register, strd);
12156    } else {
12157      __ addl($limit$$Register, (strd + 1));
12158      __ adcl($limit_hi$$Register, -1);
12159      __ lneg($limit_hi$$Register, $limit$$Register);
12160      __ movl($tmp$$Register, -strd);
12161    }
12162    // signed devision: (EAX:EDX) / pos_stride
12163    __ idivl($tmp$$Register);
12164    if (strd < 0) {
12165      // restore sign
12166      __ negl($tmp$$Register);
12167    }
12168    // (EAX) * stride
12169    __ mull($tmp$$Register);
12170    // + init (ignore upper bits)
12171    __ addl($limit$$Register, $init$$Register);
12172  %}
12173  ins_pipe( pipe_slow );
12174%}
12175
12176// ============================================================================
12177// Branch Instructions
12178// Jump Table
12179instruct jumpXtnd(rRegI switch_val) %{
12180  match(Jump switch_val);
12181  ins_cost(350);
12182  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12183  ins_encode %{
12184    // Jump to Address(table_base + switch_reg)
12185    Address index(noreg, $switch_val$$Register, Address::times_1);
12186    __ jump(ArrayAddress($constantaddress, index));
12187  %}
12188  ins_pipe(pipe_jmp);
12189%}
12190
12191// Jump Direct - Label defines a relative address from JMP+1
12192instruct jmpDir(label labl) %{
12193  match(Goto);
12194  effect(USE labl);
12195
12196  ins_cost(300);
12197  format %{ "JMP    $labl" %}
12198  size(5);
12199  ins_encode %{
12200    Label* L = $labl$$label;
12201    __ jmp(*L, false); // Always long jump
12202  %}
12203  ins_pipe( pipe_jmp );
12204%}
12205
12206// Jump Direct Conditional - Label defines a relative address from Jcc+1
12207instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12208  match(If cop cr);
12209  effect(USE labl);
12210
12211  ins_cost(300);
12212  format %{ "J$cop    $labl" %}
12213  size(6);
12214  ins_encode %{
12215    Label* L = $labl$$label;
12216    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12217  %}
12218  ins_pipe( pipe_jcc );
12219%}
12220
12221// Jump Direct Conditional - Label defines a relative address from Jcc+1
12222instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12223  predicate(!n->has_vector_mask_set());
12224  match(CountedLoopEnd cop cr);
12225  effect(USE labl);
12226
12227  ins_cost(300);
12228  format %{ "J$cop    $labl\t# Loop end" %}
12229  size(6);
12230  ins_encode %{
12231    Label* L = $labl$$label;
12232    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12233  %}
12234  ins_pipe( pipe_jcc );
12235%}
12236
12237// Jump Direct Conditional - Label defines a relative address from Jcc+1
12238instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12239  predicate(!n->has_vector_mask_set());
12240  match(CountedLoopEnd cop cmp);
12241  effect(USE labl);
12242
12243  ins_cost(300);
12244  format %{ "J$cop,u  $labl\t# Loop end" %}
12245  size(6);
12246  ins_encode %{
12247    Label* L = $labl$$label;
12248    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12249  %}
12250  ins_pipe( pipe_jcc );
12251%}
12252
12253instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12254  predicate(!n->has_vector_mask_set());
12255  match(CountedLoopEnd cop cmp);
12256  effect(USE labl);
12257
12258  ins_cost(200);
12259  format %{ "J$cop,u  $labl\t# Loop end" %}
12260  size(6);
12261  ins_encode %{
12262    Label* L = $labl$$label;
12263    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12264  %}
12265  ins_pipe( pipe_jcc );
12266%}
12267
12268// mask version
12269// Jump Direct Conditional - Label defines a relative address from Jcc+1
12270instruct jmpLoopEnd_and_restoreMask(cmpOp cop, eFlagsReg cr, label labl) %{
12271  predicate(n->has_vector_mask_set());
12272  match(CountedLoopEnd cop cr);
12273  effect(USE labl);
12274
12275  ins_cost(400);
12276  format %{ "J$cop    $labl\t# Loop end\n\t"
12277            "restorevectmask \t# vector mask restore for loops" %}
12278  size(10);
12279  ins_encode %{
12280    Label* L = $labl$$label;
12281    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12282    __ restorevectmask();
12283  %}
12284  ins_pipe( pipe_jcc );
12285%}
12286
12287// Jump Direct Conditional - Label defines a relative address from Jcc+1
12288instruct jmpLoopEndU_and_restoreMask(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12289  predicate(n->has_vector_mask_set());
12290  match(CountedLoopEnd cop cmp);
12291  effect(USE labl);
12292
12293  ins_cost(400);
12294  format %{ "J$cop,u  $labl\t# Loop end\n\t"
12295            "restorevectmask \t# vector mask restore for loops" %}
12296  size(10);
12297  ins_encode %{
12298    Label* L = $labl$$label;
12299    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12300    __ restorevectmask();
12301  %}
12302  ins_pipe( pipe_jcc );
12303%}
12304
12305instruct jmpLoopEndUCF_and_restoreMask(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12306  predicate(n->has_vector_mask_set());
12307  match(CountedLoopEnd cop cmp);
12308  effect(USE labl);
12309
12310  ins_cost(300);
12311  format %{ "J$cop,u  $labl\t# Loop end\n\t"
12312            "restorevectmask \t# vector mask restore for loops" %}
12313  size(10);
12314  ins_encode %{
12315    Label* L = $labl$$label;
12316    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12317    __ restorevectmask();
12318  %}
12319  ins_pipe( pipe_jcc );
12320%}
12321
12322// Jump Direct Conditional - using unsigned comparison
12323instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12324  match(If cop cmp);
12325  effect(USE labl);
12326
12327  ins_cost(300);
12328  format %{ "J$cop,u  $labl" %}
12329  size(6);
12330  ins_encode %{
12331    Label* L = $labl$$label;
12332    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12333  %}
12334  ins_pipe(pipe_jcc);
12335%}
12336
12337instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12338  match(If cop cmp);
12339  effect(USE labl);
12340
12341  ins_cost(200);
12342  format %{ "J$cop,u  $labl" %}
12343  size(6);
12344  ins_encode %{
12345    Label* L = $labl$$label;
12346    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12347  %}
12348  ins_pipe(pipe_jcc);
12349%}
12350
12351instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12352  match(If cop cmp);
12353  effect(USE labl);
12354
12355  ins_cost(200);
12356  format %{ $$template
12357    if ($cop$$cmpcode == Assembler::notEqual) {
12358      $$emit$$"JP,u   $labl\n\t"
12359      $$emit$$"J$cop,u   $labl"
12360    } else {
12361      $$emit$$"JP,u   done\n\t"
12362      $$emit$$"J$cop,u   $labl\n\t"
12363      $$emit$$"done:"
12364    }
12365  %}
12366  ins_encode %{
12367    Label* l = $labl$$label;
12368    if ($cop$$cmpcode == Assembler::notEqual) {
12369      __ jcc(Assembler::parity, *l, false);
12370      __ jcc(Assembler::notEqual, *l, false);
12371    } else if ($cop$$cmpcode == Assembler::equal) {
12372      Label done;
12373      __ jccb(Assembler::parity, done);
12374      __ jcc(Assembler::equal, *l, false);
12375      __ bind(done);
12376    } else {
12377       ShouldNotReachHere();
12378    }
12379  %}
12380  ins_pipe(pipe_jcc);
12381%}
12382
12383// ============================================================================
12384// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12385// array for an instance of the superklass.  Set a hidden internal cache on a
12386// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12387// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12388instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12389  match(Set result (PartialSubtypeCheck sub super));
12390  effect( KILL rcx, KILL cr );
12391
12392  ins_cost(1100);  // slightly larger than the next version
12393  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12394            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12395            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12396            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12397            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12398            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12399            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12400     "miss:\t" %}
12401
12402  opcode(0x1); // Force a XOR of EDI
12403  ins_encode( enc_PartialSubtypeCheck() );
12404  ins_pipe( pipe_slow );
12405%}
12406
12407instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12408  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12409  effect( KILL rcx, KILL result );
12410
12411  ins_cost(1000);
12412  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12413            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12414            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12415            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12416            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12417            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12418     "miss:\t" %}
12419
12420  opcode(0x0);  // No need to XOR EDI
12421  ins_encode( enc_PartialSubtypeCheck() );
12422  ins_pipe( pipe_slow );
12423%}
12424
12425// ============================================================================
12426// Branch Instructions -- short offset versions
12427//
12428// These instructions are used to replace jumps of a long offset (the default
12429// match) with jumps of a shorter offset.  These instructions are all tagged
12430// with the ins_short_branch attribute, which causes the ADLC to suppress the
12431// match rules in general matching.  Instead, the ADLC generates a conversion
12432// method in the MachNode which can be used to do in-place replacement of the
12433// long variant with the shorter variant.  The compiler will determine if a
12434// branch can be taken by the is_short_branch_offset() predicate in the machine
12435// specific code section of the file.
12436
12437// Jump Direct - Label defines a relative address from JMP+1
12438instruct jmpDir_short(label labl) %{
12439  match(Goto);
12440  effect(USE labl);
12441
12442  ins_cost(300);
12443  format %{ "JMP,s  $labl" %}
12444  size(2);
12445  ins_encode %{
12446    Label* L = $labl$$label;
12447    __ jmpb(*L);
12448  %}
12449  ins_pipe( pipe_jmp );
12450  ins_short_branch(1);
12451%}
12452
12453// Jump Direct Conditional - Label defines a relative address from Jcc+1
12454instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12455  match(If cop cr);
12456  effect(USE labl);
12457
12458  ins_cost(300);
12459  format %{ "J$cop,s  $labl" %}
12460  size(2);
12461  ins_encode %{
12462    Label* L = $labl$$label;
12463    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12464  %}
12465  ins_pipe( pipe_jcc );
12466  ins_short_branch(1);
12467%}
12468
12469// Jump Direct Conditional - Label defines a relative address from Jcc+1
12470instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12471  match(CountedLoopEnd cop cr);
12472  effect(USE labl);
12473
12474  ins_cost(300);
12475  format %{ "J$cop,s  $labl\t# Loop end" %}
12476  size(2);
12477  ins_encode %{
12478    Label* L = $labl$$label;
12479    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12480  %}
12481  ins_pipe( pipe_jcc );
12482  ins_short_branch(1);
12483%}
12484
12485// Jump Direct Conditional - Label defines a relative address from Jcc+1
12486instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12487  match(CountedLoopEnd cop cmp);
12488  effect(USE labl);
12489
12490  ins_cost(300);
12491  format %{ "J$cop,us $labl\t# Loop end" %}
12492  size(2);
12493  ins_encode %{
12494    Label* L = $labl$$label;
12495    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12496  %}
12497  ins_pipe( pipe_jcc );
12498  ins_short_branch(1);
12499%}
12500
12501instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12502  match(CountedLoopEnd cop cmp);
12503  effect(USE labl);
12504
12505  ins_cost(300);
12506  format %{ "J$cop,us $labl\t# Loop end" %}
12507  size(2);
12508  ins_encode %{
12509    Label* L = $labl$$label;
12510    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12511  %}
12512  ins_pipe( pipe_jcc );
12513  ins_short_branch(1);
12514%}
12515
12516// Jump Direct Conditional - using unsigned comparison
12517instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12518  match(If cop cmp);
12519  effect(USE labl);
12520
12521  ins_cost(300);
12522  format %{ "J$cop,us $labl" %}
12523  size(2);
12524  ins_encode %{
12525    Label* L = $labl$$label;
12526    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12527  %}
12528  ins_pipe( pipe_jcc );
12529  ins_short_branch(1);
12530%}
12531
12532instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12533  match(If cop cmp);
12534  effect(USE labl);
12535
12536  ins_cost(300);
12537  format %{ "J$cop,us $labl" %}
12538  size(2);
12539  ins_encode %{
12540    Label* L = $labl$$label;
12541    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12542  %}
12543  ins_pipe( pipe_jcc );
12544  ins_short_branch(1);
12545%}
12546
12547instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12548  match(If cop cmp);
12549  effect(USE labl);
12550
12551  ins_cost(300);
12552  format %{ $$template
12553    if ($cop$$cmpcode == Assembler::notEqual) {
12554      $$emit$$"JP,u,s   $labl\n\t"
12555      $$emit$$"J$cop,u,s   $labl"
12556    } else {
12557      $$emit$$"JP,u,s   done\n\t"
12558      $$emit$$"J$cop,u,s  $labl\n\t"
12559      $$emit$$"done:"
12560    }
12561  %}
12562  size(4);
12563  ins_encode %{
12564    Label* l = $labl$$label;
12565    if ($cop$$cmpcode == Assembler::notEqual) {
12566      __ jccb(Assembler::parity, *l);
12567      __ jccb(Assembler::notEqual, *l);
12568    } else if ($cop$$cmpcode == Assembler::equal) {
12569      Label done;
12570      __ jccb(Assembler::parity, done);
12571      __ jccb(Assembler::equal, *l);
12572      __ bind(done);
12573    } else {
12574       ShouldNotReachHere();
12575    }
12576  %}
12577  ins_pipe(pipe_jcc);
12578  ins_short_branch(1);
12579%}
12580
12581// ============================================================================
12582// Long Compare
12583//
12584// Currently we hold longs in 2 registers.  Comparing such values efficiently
12585// is tricky.  The flavor of compare used depends on whether we are testing
12586// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12587// The GE test is the negated LT test.  The LE test can be had by commuting
12588// the operands (yielding a GE test) and then negating; negate again for the
12589// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12590// NE test is negated from that.
12591
12592// Due to a shortcoming in the ADLC, it mixes up expressions like:
12593// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12594// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12595// are collapsed internally in the ADLC's dfa-gen code.  The match for
12596// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12597// foo match ends up with the wrong leaf.  One fix is to not match both
12598// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12599// both forms beat the trinary form of long-compare and both are very useful
12600// on Intel which has so few registers.
12601
12602// Manifest a CmpL result in an integer register.  Very painful.
12603// This is the test to avoid.
12604instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12605  match(Set dst (CmpL3 src1 src2));
12606  effect( KILL flags );
12607  ins_cost(1000);
12608  format %{ "XOR    $dst,$dst\n\t"
12609            "CMP    $src1.hi,$src2.hi\n\t"
12610            "JLT,s  m_one\n\t"
12611            "JGT,s  p_one\n\t"
12612            "CMP    $src1.lo,$src2.lo\n\t"
12613            "JB,s   m_one\n\t"
12614            "JEQ,s  done\n"
12615    "p_one:\tINC    $dst\n\t"
12616            "JMP,s  done\n"
12617    "m_one:\tDEC    $dst\n"
12618     "done:" %}
12619  ins_encode %{
12620    Label p_one, m_one, done;
12621    __ xorptr($dst$$Register, $dst$$Register);
12622    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12623    __ jccb(Assembler::less,    m_one);
12624    __ jccb(Assembler::greater, p_one);
12625    __ cmpl($src1$$Register, $src2$$Register);
12626    __ jccb(Assembler::below,   m_one);
12627    __ jccb(Assembler::equal,   done);
12628    __ bind(p_one);
12629    __ incrementl($dst$$Register);
12630    __ jmpb(done);
12631    __ bind(m_one);
12632    __ decrementl($dst$$Register);
12633    __ bind(done);
12634  %}
12635  ins_pipe( pipe_slow );
12636%}
12637
12638//======
12639// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12640// compares.  Can be used for LE or GT compares by reversing arguments.
12641// NOT GOOD FOR EQ/NE tests.
12642instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12643  match( Set flags (CmpL src zero ));
12644  ins_cost(100);
12645  format %{ "TEST   $src.hi,$src.hi" %}
12646  opcode(0x85);
12647  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12648  ins_pipe( ialu_cr_reg_reg );
12649%}
12650
12651// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12652// compares.  Can be used for LE or GT compares by reversing arguments.
12653// NOT GOOD FOR EQ/NE tests.
12654instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12655  match( Set flags (CmpL src1 src2 ));
12656  effect( TEMP tmp );
12657  ins_cost(300);
12658  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12659            "MOV    $tmp,$src1.hi\n\t"
12660            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12661  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12662  ins_pipe( ialu_cr_reg_reg );
12663%}
12664
12665// Long compares reg < zero/req OR reg >= zero/req.
12666// Just a wrapper for a normal branch, plus the predicate test.
12667instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12668  match(If cmp flags);
12669  effect(USE labl);
12670  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12671  expand %{
12672    jmpCon(cmp,flags,labl);    // JLT or JGE...
12673  %}
12674%}
12675
12676//======
12677// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12678// compares.  Can be used for LE or GT compares by reversing arguments.
12679// NOT GOOD FOR EQ/NE tests.
12680instruct cmpUL_zero_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src, immL0 zero) %{
12681  match(Set flags (CmpUL src zero));
12682  ins_cost(100);
12683  format %{ "TEST   $src.hi,$src.hi" %}
12684  opcode(0x85);
12685  ins_encode(OpcP, RegReg_Hi2(src, src));
12686  ins_pipe(ialu_cr_reg_reg);
12687%}
12688
12689// Manifest a CmpUL result in the normal flags.  Only good for LT or GE
12690// compares.  Can be used for LE or GT compares by reversing arguments.
12691// NOT GOOD FOR EQ/NE tests.
12692instruct cmpUL_reg_flags_LTGE(flagsReg_ulong_LTGE flags, eRegL src1, eRegL src2, rRegI tmp) %{
12693  match(Set flags (CmpUL src1 src2));
12694  effect(TEMP tmp);
12695  ins_cost(300);
12696  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12697            "MOV    $tmp,$src1.hi\n\t"
12698            "SBB    $tmp,$src2.hi\t! Compute flags for unsigned long compare" %}
12699  ins_encode(long_cmp_flags2(src1, src2, tmp));
12700  ins_pipe(ialu_cr_reg_reg);
12701%}
12702
12703// Unsigned long compares reg < zero/req OR reg >= zero/req.
12704// Just a wrapper for a normal branch, plus the predicate test.
12705instruct cmpUL_LTGE(cmpOpU cmp, flagsReg_ulong_LTGE flags, label labl) %{
12706  match(If cmp flags);
12707  effect(USE labl);
12708  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge);
12709  expand %{
12710    jmpCon(cmp, flags, labl);    // JLT or JGE...
12711  %}
12712%}
12713
12714// Compare 2 longs and CMOVE longs.
12715instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12716  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12717  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12718  ins_cost(400);
12719  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12720            "CMOV$cmp $dst.hi,$src.hi" %}
12721  opcode(0x0F,0x40);
12722  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12723  ins_pipe( pipe_cmov_reg_long );
12724%}
12725
12726instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12727  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12728  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12729  ins_cost(500);
12730  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12731            "CMOV$cmp $dst.hi,$src.hi" %}
12732  opcode(0x0F,0x40);
12733  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12734  ins_pipe( pipe_cmov_reg_long );
12735%}
12736
12737// Compare 2 longs and CMOVE ints.
12738instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12739  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12740  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12741  ins_cost(200);
12742  format %{ "CMOV$cmp $dst,$src" %}
12743  opcode(0x0F,0x40);
12744  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12745  ins_pipe( pipe_cmov_reg );
12746%}
12747
12748instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12749  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12750  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12751  ins_cost(250);
12752  format %{ "CMOV$cmp $dst,$src" %}
12753  opcode(0x0F,0x40);
12754  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12755  ins_pipe( pipe_cmov_mem );
12756%}
12757
12758// Compare 2 longs and CMOVE ints.
12759instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12760  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12761  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12762  ins_cost(200);
12763  format %{ "CMOV$cmp $dst,$src" %}
12764  opcode(0x0F,0x40);
12765  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12766  ins_pipe( pipe_cmov_reg );
12767%}
12768
12769// Compare 2 longs and CMOVE doubles
12770instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12771  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12772  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12773  ins_cost(200);
12774  expand %{
12775    fcmovDPR_regS(cmp,flags,dst,src);
12776  %}
12777%}
12778
12779// Compare 2 longs and CMOVE doubles
12780instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12781  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12782  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12783  ins_cost(200);
12784  expand %{
12785    fcmovD_regS(cmp,flags,dst,src);
12786  %}
12787%}
12788
12789instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12790  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12791  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12792  ins_cost(200);
12793  expand %{
12794    fcmovFPR_regS(cmp,flags,dst,src);
12795  %}
12796%}
12797
12798instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12799  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12800  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12801  ins_cost(200);
12802  expand %{
12803    fcmovF_regS(cmp,flags,dst,src);
12804  %}
12805%}
12806
12807//======
12808// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12809instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12810  match( Set flags (CmpL src zero ));
12811  effect(TEMP tmp);
12812  ins_cost(200);
12813  format %{ "MOV    $tmp,$src.lo\n\t"
12814            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12815  ins_encode( long_cmp_flags0( src, tmp ) );
12816  ins_pipe( ialu_reg_reg_long );
12817%}
12818
12819// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12820instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12821  match( Set flags (CmpL src1 src2 ));
12822  ins_cost(200+300);
12823  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12824            "JNE,s  skip\n\t"
12825            "CMP    $src1.hi,$src2.hi\n\t"
12826     "skip:\t" %}
12827  ins_encode( long_cmp_flags1( src1, src2 ) );
12828  ins_pipe( ialu_cr_reg_reg );
12829%}
12830
12831// Long compare reg == zero/reg OR reg != zero/reg
12832// Just a wrapper for a normal branch, plus the predicate test.
12833instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12834  match(If cmp flags);
12835  effect(USE labl);
12836  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12837  expand %{
12838    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12839  %}
12840%}
12841
12842//======
12843// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12844instruct cmpUL_zero_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src, immL0 zero, rRegI tmp) %{
12845  match(Set flags (CmpUL src zero));
12846  effect(TEMP tmp);
12847  ins_cost(200);
12848  format %{ "MOV    $tmp,$src.lo\n\t"
12849            "OR     $tmp,$src.hi\t! Unsigned long is EQ/NE 0?" %}
12850  ins_encode(long_cmp_flags0(src, tmp));
12851  ins_pipe(ialu_reg_reg_long);
12852%}
12853
12854// Manifest a CmpUL result in the normal flags.  Only good for EQ/NE compares.
12855instruct cmpUL_reg_flags_EQNE(flagsReg_ulong_EQNE flags, eRegL src1, eRegL src2) %{
12856  match(Set flags (CmpUL src1 src2));
12857  ins_cost(200+300);
12858  format %{ "CMP    $src1.lo,$src2.lo\t! Unsigned long compare; set flags for low bits\n\t"
12859            "JNE,s  skip\n\t"
12860            "CMP    $src1.hi,$src2.hi\n\t"
12861     "skip:\t" %}
12862  ins_encode(long_cmp_flags1(src1, src2));
12863  ins_pipe(ialu_cr_reg_reg);
12864%}
12865
12866// Unsigned long compare reg == zero/reg OR reg != zero/reg
12867// Just a wrapper for a normal branch, plus the predicate test.
12868instruct cmpUL_EQNE(cmpOpU cmp, flagsReg_ulong_EQNE flags, label labl) %{
12869  match(If cmp flags);
12870  effect(USE labl);
12871  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne);
12872  expand %{
12873    jmpCon(cmp, flags, labl);    // JEQ or JNE...
12874  %}
12875%}
12876
12877// Compare 2 longs and CMOVE longs.
12878instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12879  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12880  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12881  ins_cost(400);
12882  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12883            "CMOV$cmp $dst.hi,$src.hi" %}
12884  opcode(0x0F,0x40);
12885  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12886  ins_pipe( pipe_cmov_reg_long );
12887%}
12888
12889instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12890  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12891  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12892  ins_cost(500);
12893  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12894            "CMOV$cmp $dst.hi,$src.hi" %}
12895  opcode(0x0F,0x40);
12896  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12897  ins_pipe( pipe_cmov_reg_long );
12898%}
12899
12900// Compare 2 longs and CMOVE ints.
12901instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12902  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12903  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12904  ins_cost(200);
12905  format %{ "CMOV$cmp $dst,$src" %}
12906  opcode(0x0F,0x40);
12907  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12908  ins_pipe( pipe_cmov_reg );
12909%}
12910
12911instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12912  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12913  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12914  ins_cost(250);
12915  format %{ "CMOV$cmp $dst,$src" %}
12916  opcode(0x0F,0x40);
12917  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12918  ins_pipe( pipe_cmov_mem );
12919%}
12920
12921// Compare 2 longs and CMOVE ints.
12922instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12923  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12924  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12925  ins_cost(200);
12926  format %{ "CMOV$cmp $dst,$src" %}
12927  opcode(0x0F,0x40);
12928  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12929  ins_pipe( pipe_cmov_reg );
12930%}
12931
12932// Compare 2 longs and CMOVE doubles
12933instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12934  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12935  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12936  ins_cost(200);
12937  expand %{
12938    fcmovDPR_regS(cmp,flags,dst,src);
12939  %}
12940%}
12941
12942// Compare 2 longs and CMOVE doubles
12943instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12944  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12945  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12946  ins_cost(200);
12947  expand %{
12948    fcmovD_regS(cmp,flags,dst,src);
12949  %}
12950%}
12951
12952instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12953  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12954  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12955  ins_cost(200);
12956  expand %{
12957    fcmovFPR_regS(cmp,flags,dst,src);
12958  %}
12959%}
12960
12961instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12962  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12963  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12964  ins_cost(200);
12965  expand %{
12966    fcmovF_regS(cmp,flags,dst,src);
12967  %}
12968%}
12969
12970//======
12971// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12972// Same as cmpL_reg_flags_LEGT except must negate src
12973instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12974  match( Set flags (CmpL src zero ));
12975  effect( TEMP tmp );
12976  ins_cost(300);
12977  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12978            "CMP    $tmp,$src.lo\n\t"
12979            "SBB    $tmp,$src.hi\n\t" %}
12980  ins_encode( long_cmp_flags3(src, tmp) );
12981  ins_pipe( ialu_reg_reg_long );
12982%}
12983
12984// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12985// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12986// requires a commuted test to get the same result.
12987instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12988  match( Set flags (CmpL src1 src2 ));
12989  effect( TEMP tmp );
12990  ins_cost(300);
12991  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12992            "MOV    $tmp,$src2.hi\n\t"
12993            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12994  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12995  ins_pipe( ialu_cr_reg_reg );
12996%}
12997
12998// Long compares reg < zero/req OR reg >= zero/req.
12999// Just a wrapper for a normal branch, plus the predicate test
13000instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13001  match(If cmp flags);
13002  effect(USE labl);
13003  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13004  ins_cost(300);
13005  expand %{
13006    jmpCon(cmp,flags,labl);    // JGT or JLE...
13007  %}
13008%}
13009
13010//======
13011// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13012// Same as cmpUL_reg_flags_LEGT except must negate src
13013instruct cmpUL_zero_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src, immL0 zero, rRegI tmp) %{
13014  match(Set flags (CmpUL src zero));
13015  effect(TEMP tmp);
13016  ins_cost(300);
13017  format %{ "XOR    $tmp,$tmp\t# Unsigned long compare for -$src < 0, use commuted test\n\t"
13018            "CMP    $tmp,$src.lo\n\t"
13019            "SBB    $tmp,$src.hi\n\t" %}
13020  ins_encode(long_cmp_flags3(src, tmp));
13021  ins_pipe(ialu_reg_reg_long);
13022%}
13023
13024// Manifest a CmpUL result in the normal flags.  Only good for LE or GT compares.
13025// Same as cmpUL_reg_flags_LTGE except operands swapped.  Swapping operands
13026// requires a commuted test to get the same result.
13027instruct cmpUL_reg_flags_LEGT(flagsReg_ulong_LEGT flags, eRegL src1, eRegL src2, rRegI tmp) %{
13028  match(Set flags (CmpUL src1 src2));
13029  effect(TEMP tmp);
13030  ins_cost(300);
13031  format %{ "CMP    $src2.lo,$src1.lo\t! Unsigned long compare, swapped operands, use with commuted test\n\t"
13032            "MOV    $tmp,$src2.hi\n\t"
13033            "SBB    $tmp,$src1.hi\t! Compute flags for unsigned long compare" %}
13034  ins_encode(long_cmp_flags2( src2, src1, tmp));
13035  ins_pipe(ialu_cr_reg_reg);
13036%}
13037
13038// Unsigned long compares reg < zero/req OR reg >= zero/req.
13039// Just a wrapper for a normal branch, plus the predicate test
13040instruct cmpUL_LEGT(cmpOpU_commute cmp, flagsReg_ulong_LEGT flags, label labl) %{
13041  match(If cmp flags);
13042  effect(USE labl);
13043  predicate(_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le);
13044  ins_cost(300);
13045  expand %{
13046    jmpCon(cmp, flags, labl);    // JGT or JLE...
13047  %}
13048%}
13049
13050// Compare 2 longs and CMOVE longs.
13051instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13052  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13053  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13054  ins_cost(400);
13055  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13056            "CMOV$cmp $dst.hi,$src.hi" %}
13057  opcode(0x0F,0x40);
13058  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13059  ins_pipe( pipe_cmov_reg_long );
13060%}
13061
13062instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13063  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13064  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13065  ins_cost(500);
13066  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13067            "CMOV$cmp $dst.hi,$src.hi+4" %}
13068  opcode(0x0F,0x40);
13069  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13070  ins_pipe( pipe_cmov_reg_long );
13071%}
13072
13073// Compare 2 longs and CMOVE ints.
13074instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
13075  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13076  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13077  ins_cost(200);
13078  format %{ "CMOV$cmp $dst,$src" %}
13079  opcode(0x0F,0x40);
13080  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13081  ins_pipe( pipe_cmov_reg );
13082%}
13083
13084instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
13085  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13086  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13087  ins_cost(250);
13088  format %{ "CMOV$cmp $dst,$src" %}
13089  opcode(0x0F,0x40);
13090  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13091  ins_pipe( pipe_cmov_mem );
13092%}
13093
13094// Compare 2 longs and CMOVE ptrs.
13095instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13096  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13097  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13098  ins_cost(200);
13099  format %{ "CMOV$cmp $dst,$src" %}
13100  opcode(0x0F,0x40);
13101  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13102  ins_pipe( pipe_cmov_reg );
13103%}
13104
13105// Compare 2 longs and CMOVE doubles
13106instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13107  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13108  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13109  ins_cost(200);
13110  expand %{
13111    fcmovDPR_regS(cmp,flags,dst,src);
13112  %}
13113%}
13114
13115// Compare 2 longs and CMOVE doubles
13116instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13117  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13118  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13119  ins_cost(200);
13120  expand %{
13121    fcmovD_regS(cmp,flags,dst,src);
13122  %}
13123%}
13124
13125instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13126  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13127  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13128  ins_cost(200);
13129  expand %{
13130    fcmovFPR_regS(cmp,flags,dst,src);
13131  %}
13132%}
13133
13134
13135instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13136  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13137  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13138  ins_cost(200);
13139  expand %{
13140    fcmovF_regS(cmp,flags,dst,src);
13141  %}
13142%}
13143
13144
13145// ============================================================================
13146// Procedure Call/Return Instructions
13147// Call Java Static Instruction
13148// Note: If this code changes, the corresponding ret_addr_offset() and
13149//       compute_padding() functions will have to be adjusted.
13150instruct CallStaticJavaDirect(method meth) %{
13151  match(CallStaticJava);
13152  effect(USE meth);
13153
13154  ins_cost(300);
13155  format %{ "CALL,static " %}
13156  opcode(0xE8); /* E8 cd */
13157  ins_encode( pre_call_resets,
13158              Java_Static_Call( meth ),
13159              call_epilog,
13160              post_call_FPU );
13161  ins_pipe( pipe_slow );
13162  ins_alignment(4);
13163%}
13164
13165// Call Java Dynamic Instruction
13166// Note: If this code changes, the corresponding ret_addr_offset() and
13167//       compute_padding() functions will have to be adjusted.
13168instruct CallDynamicJavaDirect(method meth) %{
13169  match(CallDynamicJava);
13170  effect(USE meth);
13171
13172  ins_cost(300);
13173  format %{ "MOV    EAX,(oop)-1\n\t"
13174            "CALL,dynamic" %}
13175  opcode(0xE8); /* E8 cd */
13176  ins_encode( pre_call_resets,
13177              Java_Dynamic_Call( meth ),
13178              call_epilog,
13179              post_call_FPU );
13180  ins_pipe( pipe_slow );
13181  ins_alignment(4);
13182%}
13183
13184// Call Runtime Instruction
13185instruct CallRuntimeDirect(method meth) %{
13186  match(CallRuntime );
13187  effect(USE meth);
13188
13189  ins_cost(300);
13190  format %{ "CALL,runtime " %}
13191  opcode(0xE8); /* E8 cd */
13192  // Use FFREEs to clear entries in float stack
13193  ins_encode( pre_call_resets,
13194              FFree_Float_Stack_All,
13195              Java_To_Runtime( meth ),
13196              post_call_FPU );
13197  ins_pipe( pipe_slow );
13198%}
13199
13200// Call runtime without safepoint
13201instruct CallLeafDirect(method meth) %{
13202  match(CallLeaf);
13203  effect(USE meth);
13204
13205  ins_cost(300);
13206  format %{ "CALL_LEAF,runtime " %}
13207  opcode(0xE8); /* E8 cd */
13208  ins_encode( pre_call_resets,
13209              FFree_Float_Stack_All,
13210              Java_To_Runtime( meth ),
13211              Verify_FPU_For_Leaf, post_call_FPU );
13212  ins_pipe( pipe_slow );
13213%}
13214
13215instruct CallLeafNoFPDirect(method meth) %{
13216  match(CallLeafNoFP);
13217  effect(USE meth);
13218
13219  ins_cost(300);
13220  format %{ "CALL_LEAF_NOFP,runtime " %}
13221  opcode(0xE8); /* E8 cd */
13222  ins_encode(pre_call_resets, Java_To_Runtime(meth));
13223  ins_pipe( pipe_slow );
13224%}
13225
13226
13227// Return Instruction
13228// Remove the return address & jump to it.
13229instruct Ret() %{
13230  match(Return);
13231  format %{ "RET" %}
13232  opcode(0xC3);
13233  ins_encode(OpcP);
13234  ins_pipe( pipe_jmp );
13235%}
13236
13237// Tail Call; Jump from runtime stub to Java code.
13238// Also known as an 'interprocedural jump'.
13239// Target of jump will eventually return to caller.
13240// TailJump below removes the return address.
13241instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13242  match(TailCall jump_target method_oop );
13243  ins_cost(300);
13244  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13245  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13246  ins_encode( OpcP, RegOpc(jump_target) );
13247  ins_pipe( pipe_jmp );
13248%}
13249
13250
13251// Tail Jump; remove the return address; jump to target.
13252// TailCall above leaves the return address around.
13253instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13254  match( TailJump jump_target ex_oop );
13255  ins_cost(300);
13256  format %{ "POP    EDX\t# pop return address into dummy\n\t"
13257            "JMP    $jump_target " %}
13258  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13259  ins_encode( enc_pop_rdx,
13260              OpcP, RegOpc(jump_target) );
13261  ins_pipe( pipe_jmp );
13262%}
13263
13264// Create exception oop: created by stack-crawling runtime code.
13265// Created exception is now available to this handler, and is setup
13266// just prior to jumping to this handler.  No code emitted.
13267instruct CreateException( eAXRegP ex_oop )
13268%{
13269  match(Set ex_oop (CreateEx));
13270
13271  size(0);
13272  // use the following format syntax
13273  format %{ "# exception oop is in EAX; no code emitted" %}
13274  ins_encode();
13275  ins_pipe( empty );
13276%}
13277
13278
13279// Rethrow exception:
13280// The exception oop will come in the first argument position.
13281// Then JUMP (not call) to the rethrow stub code.
13282instruct RethrowException()
13283%{
13284  match(Rethrow);
13285
13286  // use the following format syntax
13287  format %{ "JMP    rethrow_stub" %}
13288  ins_encode(enc_rethrow);
13289  ins_pipe( pipe_jmp );
13290%}
13291
13292// inlined locking and unlocking
13293
13294instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13295  predicate(Compile::current()->use_rtm());
13296  match(Set cr (FastLock object box));
13297  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13298  ins_cost(300);
13299  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13300  ins_encode %{
13301    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13302                 $scr$$Register, $cx1$$Register, $cx2$$Register,
13303                 _counters, _rtm_counters, _stack_rtm_counters,
13304                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13305                 true, ra_->C->profile_rtm());
13306  %}
13307  ins_pipe(pipe_slow);
13308%}
13309
13310instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13311  predicate(!Compile::current()->use_rtm());
13312  match(Set cr (FastLock object box));
13313  effect(TEMP tmp, TEMP scr, USE_KILL box);
13314  ins_cost(300);
13315  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13316  ins_encode %{
13317    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13318                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13319  %}
13320  ins_pipe(pipe_slow);
13321%}
13322
13323instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13324  match(Set cr (FastUnlock object box));
13325  effect(TEMP tmp, USE_KILL box);
13326  ins_cost(300);
13327  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13328  ins_encode %{
13329    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13330  %}
13331  ins_pipe(pipe_slow);
13332%}
13333
13334
13335
13336// ============================================================================
13337// Safepoint Instruction
13338instruct safePoint_poll(eFlagsReg cr) %{
13339  match(SafePoint);
13340  effect(KILL cr);
13341
13342  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13343  // On SPARC that might be acceptable as we can generate the address with
13344  // just a sethi, saving an or.  By polling at offset 0 we can end up
13345  // putting additional pressure on the index-0 in the D$.  Because of
13346  // alignment (just like the situation at hand) the lower indices tend
13347  // to see more traffic.  It'd be better to change the polling address
13348  // to offset 0 of the last $line in the polling page.
13349
13350  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13351  ins_cost(125);
13352  size(6) ;
13353  ins_encode( Safepoint_Poll() );
13354  ins_pipe( ialu_reg_mem );
13355%}
13356
13357
13358// ============================================================================
13359// This name is KNOWN by the ADLC and cannot be changed.
13360// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13361// for this guy.
13362instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13363  match(Set dst (ThreadLocal));
13364  effect(DEF dst, KILL cr);
13365
13366  format %{ "MOV    $dst, Thread::current()" %}
13367  ins_encode %{
13368    Register dstReg = as_Register($dst$$reg);
13369    __ get_thread(dstReg);
13370  %}
13371  ins_pipe( ialu_reg_fat );
13372%}
13373
13374
13375
13376//----------PEEPHOLE RULES-----------------------------------------------------
13377// These must follow all instruction definitions as they use the names
13378// defined in the instructions definitions.
13379//
13380// peepmatch ( root_instr_name [preceding_instruction]* );
13381//
13382// peepconstraint %{
13383// (instruction_number.operand_name relational_op instruction_number.operand_name
13384//  [, ...] );
13385// // instruction numbers are zero-based using left to right order in peepmatch
13386//
13387// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13388// // provide an instruction_number.operand_name for each operand that appears
13389// // in the replacement instruction's match rule
13390//
13391// ---------VM FLAGS---------------------------------------------------------
13392//
13393// All peephole optimizations can be turned off using -XX:-OptoPeephole
13394//
13395// Each peephole rule is given an identifying number starting with zero and
13396// increasing by one in the order seen by the parser.  An individual peephole
13397// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13398// on the command-line.
13399//
13400// ---------CURRENT LIMITATIONS----------------------------------------------
13401//
13402// Only match adjacent instructions in same basic block
13403// Only equality constraints
13404// Only constraints between operands, not (0.dest_reg == EAX_enc)
13405// Only one replacement instruction
13406//
13407// ---------EXAMPLE----------------------------------------------------------
13408//
13409// // pertinent parts of existing instructions in architecture description
13410// instruct movI(rRegI dst, rRegI src) %{
13411//   match(Set dst (CopyI src));
13412// %}
13413//
13414// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13415//   match(Set dst (AddI dst src));
13416//   effect(KILL cr);
13417// %}
13418//
13419// // Change (inc mov) to lea
13420// peephole %{
13421//   // increment preceeded by register-register move
13422//   peepmatch ( incI_eReg movI );
13423//   // require that the destination register of the increment
13424//   // match the destination register of the move
13425//   peepconstraint ( 0.dst == 1.dst );
13426//   // construct a replacement instruction that sets
13427//   // the destination to ( move's source register + one )
13428//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13429// %}
13430//
13431// Implementation no longer uses movX instructions since
13432// machine-independent system no longer uses CopyX nodes.
13433//
13434// peephole %{
13435//   peepmatch ( incI_eReg movI );
13436//   peepconstraint ( 0.dst == 1.dst );
13437//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13438// %}
13439//
13440// peephole %{
13441//   peepmatch ( decI_eReg movI );
13442//   peepconstraint ( 0.dst == 1.dst );
13443//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13444// %}
13445//
13446// peephole %{
13447//   peepmatch ( addI_eReg_imm movI );
13448//   peepconstraint ( 0.dst == 1.dst );
13449//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13450// %}
13451//
13452// peephole %{
13453//   peepmatch ( addP_eReg_imm movP );
13454//   peepconstraint ( 0.dst == 1.dst );
13455//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13456// %}
13457
13458// // Change load of spilled value to only a spill
13459// instruct storeI(memory mem, rRegI src) %{
13460//   match(Set mem (StoreI mem src));
13461// %}
13462//
13463// instruct loadI(rRegI dst, memory mem) %{
13464//   match(Set dst (LoadI mem));
13465// %}
13466//
13467peephole %{
13468  peepmatch ( loadI storeI );
13469  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13470  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13471%}
13472
13473//----------SMARTSPILL RULES---------------------------------------------------
13474// These must follow all instruction definitions as they use the names
13475// defined in the instructions definitions.
13476