x86_32.ad revision 3142:9b8ce46870df
1//
2// Copyright (c) 1997, 2012, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Special Registers
78reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
79
80// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
81// allocator, and only shows up in the encodings.
82reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84// Ok so here's the trick FPR1 is really st(0) except in the midst
85// of emission of assembly for a machnode. During the emission the fpu stack
86// is pushed making FPR1 == st(1) temporarily. However at any safepoint
87// the stack will not have this element so FPR1 == st(0) from the
88// oopMap viewpoint. This same weirdness with numbering causes
89// instruction encoding to have to play games with the register
90// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91// where it does flt->flt moves to see an example
92//
93reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
107
108// XMM registers.  128-bit registers or 4 words each, labeled a-d.
109// Word a in each register holds a Float, words ab hold a Double.
110// We currently do not use the SIMD capabilities, so registers cd
111// are unused at the moment.
112reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
128
129// Specify priority of register selection within phases of register
130// allocation.  Highest priority is first.  A useful heuristic is to
131// give registers a low priority when they are required by machine
132// instructions, like EAX and EDX.  Registers which are used as
133// pairs must fall on an even boundary (witness the FPR#L's in this list).
134// For the Intel integer registers, the equivalent Long pairs are
135// EDX:EAX, EBX:ECX, and EDI:EBP.
136alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
137                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139                    FPR6L, FPR6H, FPR7L, FPR7H );
140
141alloc_class chunk1( XMM0a, XMM0b,
142                    XMM1a, XMM1b,
143                    XMM2a, XMM2b,
144                    XMM3a, XMM3b,
145                    XMM4a, XMM4b,
146                    XMM5a, XMM5b,
147                    XMM6a, XMM6b,
148                    XMM7a, XMM7b, EFLAGS);
149
150
151//----------Architecture Description Register Classes--------------------------
152// Several register classes are automatically defined based upon information in
153// this architecture description.
154// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
155// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
156// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
158//
159// Class for all registers
160reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161// Class for general registers
162reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163// Class for general registers which may be used for implicit null checks on win95
164// Also safe for use by tailjump. We don't want to allocate in rbp,
165reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166// Class of "X" registers
167reg_class x_reg(EBX, ECX, EDX, EAX);
168// Class of registers that can appear in an address with no offset.
169// EBP and ESP require an extra instruction byte for zero offset.
170// Used in fast-unlock
171reg_class p_reg(EDX, EDI, ESI, EBX);
172// Class for general registers not including ECX
173reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174// Class for general registers not including EAX
175reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176// Class for general registers not including EAX or EBX.
177reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178// Class of EAX (for multiply and divide operations)
179reg_class eax_reg(EAX);
180// Class of EBX (for atomic add)
181reg_class ebx_reg(EBX);
182// Class of ECX (for shift and JCXZ operations and cmpLTMask)
183reg_class ecx_reg(ECX);
184// Class of EDX (for multiply and divide operations)
185reg_class edx_reg(EDX);
186// Class of EDI (for synchronization)
187reg_class edi_reg(EDI);
188// Class of ESI (for synchronization)
189reg_class esi_reg(ESI);
190// Singleton class for interpreter's stack pointer
191reg_class ebp_reg(EBP);
192// Singleton class for stack pointer
193reg_class sp_reg(ESP);
194// Singleton class for instruction pointer
195// reg_class ip_reg(EIP);
196// Singleton class for condition codes
197reg_class int_flags(EFLAGS);
198// Class of integer register pairs
199reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200// Class of integer register pairs that aligns with calling convention
201reg_class eadx_reg( EAX,EDX );
202reg_class ebcx_reg( ECX,EBX );
203// Not AX or DX, used in divides
204reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
205
206// Floating point registers.  Notice FPR0 is not a choice.
207// FPR0 is not ever allocated; we use clever encodings to fake
208// a 2-address instructions out of Intels FP stack.
209reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
210
211// make a register class for SSE registers
212reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
213
214// make a double register class for SSE2 registers
215reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216                  XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
217
218reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219                   FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
220                   FPR7L,FPR7H );
221
222reg_class flt_reg0( FPR1L );
223reg_class dbl_reg0( FPR1L,FPR1H );
224reg_class dbl_reg1( FPR2L,FPR2H );
225reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226                       FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
227
228// XMM6 and XMM7 could be used as temporary registers for long, float and
229// double values for SSE2.
230reg_class xdb_reg6( XMM6a,XMM6b );
231reg_class xdb_reg7( XMM7a,XMM7b );
232%}
233
234
235//----------SOURCE BLOCK-------------------------------------------------------
236// This is a block of C++ code which provides values, functions, and
237// definitions necessary in the rest of the architecture description
238source_hpp %{
239// Must be visible to the DFA in dfa_x86_32.cpp
240extern bool is_operand_hi32_zero(Node* n);
241%}
242
243source %{
244#define   RELOC_IMM32    Assembler::imm_operand
245#define   RELOC_DISP32   Assembler::disp32_operand
246
247#define __ _masm.
248
249// How to find the high register of a Long pair, given the low register
250#define   HIGH_FROM_LOW(x) ((x)+2)
251
252// These masks are used to provide 128-bit aligned bitmasks to the XMM
253// instructions, to allow sign-masking or sign-bit flipping.  They allow
254// fast versions of NegF/NegD and AbsF/AbsD.
255
256// Note: 'double' and 'long long' have 32-bits alignment on x86.
257static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
258  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
259  // of 128-bits operands for SSE instructions.
260  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
261  // Store the value to a 128-bits operand.
262  operand[0] = lo;
263  operand[1] = hi;
264  return operand;
265}
266
267// Buffer for 128-bits masks used by SSE instructions.
268static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
269
270// Static initialization during VM startup.
271static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
272static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
273static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
274static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
275
276// Offset hacking within calls.
277static int pre_call_FPU_size() {
278  if (Compile::current()->in_24_bit_fp_mode())
279    return 6; // fldcw
280  return 0;
281}
282
283static int preserve_SP_size() {
284  return 2;  // op, rm(reg/reg)
285}
286
287// !!!!! Special hack to get all type of calls to specify the byte offset
288//       from the start of the call to the point where the return address
289//       will point.
290int MachCallStaticJavaNode::ret_addr_offset() {
291  int offset = 5 + pre_call_FPU_size();  // 5 bytes from start of call to where return address points
292  if (_method_handle_invoke)
293    offset += preserve_SP_size();
294  return offset;
295}
296
297int MachCallDynamicJavaNode::ret_addr_offset() {
298  return 10 + pre_call_FPU_size();  // 10 bytes from start of call to where return address points
299}
300
301static int sizeof_FFree_Float_Stack_All = -1;
302
303int MachCallRuntimeNode::ret_addr_offset() {
304  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
305  return sizeof_FFree_Float_Stack_All + 5 + pre_call_FPU_size();
306}
307
308// Indicate if the safepoint node needs the polling page as an input.
309// Since x86 does have absolute addressing, it doesn't.
310bool SafePointNode::needs_polling_address_input() {
311  return false;
312}
313
314//
315// Compute padding required for nodes which need alignment
316//
317
318// The address of the call instruction needs to be 4-byte aligned to
319// ensure that it does not span a cache line so that it can be patched.
320int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
321  current_offset += pre_call_FPU_size();  // skip fldcw, if any
322  current_offset += 1;      // skip call opcode byte
323  return round_to(current_offset, alignment_required()) - current_offset;
324}
325
326// The address of the call instruction needs to be 4-byte aligned to
327// ensure that it does not span a cache line so that it can be patched.
328int CallStaticJavaHandleNode::compute_padding(int current_offset) const {
329  current_offset += pre_call_FPU_size();  // skip fldcw, if any
330  current_offset += preserve_SP_size();   // skip mov rbp, rsp
331  current_offset += 1;      // skip call opcode byte
332  return round_to(current_offset, alignment_required()) - current_offset;
333}
334
335// The address of the call instruction needs to be 4-byte aligned to
336// ensure that it does not span a cache line so that it can be patched.
337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338  current_offset += pre_call_FPU_size();  // skip fldcw, if any
339  current_offset += 5;      // skip MOV instruction
340  current_offset += 1;      // skip call opcode byte
341  return round_to(current_offset, alignment_required()) - current_offset;
342}
343
344// EMIT_RM()
345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347  cbuf.insts()->emit_int8(c);
348}
349
350// EMIT_CC()
351void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352  unsigned char c = (unsigned char)( f1 | f2 );
353  cbuf.insts()->emit_int8(c);
354}
355
356// EMIT_OPCODE()
357void emit_opcode(CodeBuffer &cbuf, int code) {
358  cbuf.insts()->emit_int8((unsigned char) code);
359}
360
361// EMIT_OPCODE() w/ relocation information
362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364  emit_opcode(cbuf, code);
365}
366
367// EMIT_D8()
368void emit_d8(CodeBuffer &cbuf, int d8) {
369  cbuf.insts()->emit_int8((unsigned char) d8);
370}
371
372// EMIT_D16()
373void emit_d16(CodeBuffer &cbuf, int d16) {
374  cbuf.insts()->emit_int16(d16);
375}
376
377// EMIT_D32()
378void emit_d32(CodeBuffer &cbuf, int d32) {
379  cbuf.insts()->emit_int32(d32);
380}
381
382// emit 32 bit value and construct relocation entry from relocInfo::relocType
383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384        int format) {
385  cbuf.relocate(cbuf.insts_mark(), reloc, format);
386  cbuf.insts()->emit_int32(d32);
387}
388
389// emit 32 bit value and construct relocation entry from RelocationHolder
390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391        int format) {
392#ifdef ASSERT
393  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394    assert(oop(d32)->is_oop() && (ScavengeRootsInCode || !oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
395  }
396#endif
397  cbuf.relocate(cbuf.insts_mark(), rspec, format);
398  cbuf.insts()->emit_int32(d32);
399}
400
401// Access stack slot for load or store
402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
404  if( -128 <= disp && disp <= 127 ) {
405    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
406    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
407    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
408  } else {
409    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
410    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
411    emit_d32(cbuf, disp);     // Displacement  // R/M byte
412  }
413}
414
415   // eRegI ereg, memory mem) %{    // emit_reg_mem
416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
417  // There is no index & no scale, use form without SIB byte
418  if ((index == 0x4) &&
419      (scale == 0) && (base != ESP_enc)) {
420    // If no displacement, mode is 0x0; unless base is [EBP]
421    if ( (displace == 0) && (base != EBP_enc) ) {
422      emit_rm(cbuf, 0x0, reg_encoding, base);
423    }
424    else {                    // If 8-bit displacement, mode 0x1
425      if ((displace >= -128) && (displace <= 127)
426          && !(displace_is_oop) ) {
427        emit_rm(cbuf, 0x1, reg_encoding, base);
428        emit_d8(cbuf, displace);
429      }
430      else {                  // If 32-bit displacement
431        if (base == -1) { // Special flag for absolute address
432          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433          // (manual lies; no SIB needed here)
434          if ( displace_is_oop ) {
435            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
436          } else {
437            emit_d32      (cbuf, displace);
438          }
439        }
440        else {                // Normal base + offset
441          emit_rm(cbuf, 0x2, reg_encoding, base);
442          if ( displace_is_oop ) {
443            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
444          } else {
445            emit_d32      (cbuf, displace);
446          }
447        }
448      }
449    }
450  }
451  else {                      // Else, encode with the SIB byte
452    // If no displacement, mode is 0x0; unless base is [EBP]
453    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
454      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455      emit_rm(cbuf, scale, index, base);
456    }
457    else {                    // If 8-bit displacement, mode 0x1
458      if ((displace >= -128) && (displace <= 127)
459          && !(displace_is_oop) ) {
460        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461        emit_rm(cbuf, scale, index, base);
462        emit_d8(cbuf, displace);
463      }
464      else {                  // If 32-bit displacement
465        if (base == 0x04 ) {
466          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467          emit_rm(cbuf, scale, index, 0x04);
468        } else {
469          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470          emit_rm(cbuf, scale, index, base);
471        }
472        if ( displace_is_oop ) {
473          emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
474        } else {
475          emit_d32      (cbuf, displace);
476        }
477      }
478    }
479  }
480}
481
482
483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484  if( dst_encoding == src_encoding ) {
485    // reg-reg copy, use an empty encoding
486  } else {
487    emit_opcode( cbuf, 0x8B );
488    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489  }
490}
491
492void emit_cmpfp_fixup(MacroAssembler& _masm) {
493  Label exit;
494  __ jccb(Assembler::noParity, exit);
495  __ pushf();
496  //
497  // comiss/ucomiss instructions set ZF,PF,CF flags and
498  // zero OF,AF,SF for NaN values.
499  // Fixup flags by zeroing ZF,PF so that compare of NaN
500  // values returns 'less than' result (CF is set).
501  // Leave the rest of flags unchanged.
502  //
503  //    7 6 5 4 3 2 1 0
504  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
505  //    0 0 1 0 1 0 1 1   (0x2B)
506  //
507  __ andl(Address(rsp, 0), 0xffffff2b);
508  __ popf();
509  __ bind(exit);
510}
511
512void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513  Label done;
514  __ movl(dst, -1);
515  __ jcc(Assembler::parity, done);
516  __ jcc(Assembler::below, done);
517  __ setb(Assembler::notEqual, dst);
518  __ movzbl(dst, dst);
519  __ bind(done);
520}
521
522
523//=============================================================================
524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525
526int Compile::ConstantTable::calculate_table_base_offset() const {
527  return 0;  // absolute addressing, no offset
528}
529
530void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
531  // Empty encoding
532}
533
534uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
535  return 0;
536}
537
538#ifndef PRODUCT
539void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
540  st->print("# MachConstantBaseNode (empty encoding)");
541}
542#endif
543
544
545//=============================================================================
546#ifndef PRODUCT
547void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
548  Compile* C = ra_->C;
549
550  int framesize = C->frame_slots() << LogBytesPerInt;
551  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
552  // Remove wordSize for return addr which is already pushed.
553  framesize -= wordSize;
554
555  if (C->need_stack_bang(framesize)) {
556    framesize -= wordSize;
557    st->print("# stack bang");
558    st->print("\n\t");
559    st->print("PUSH   EBP\t# Save EBP");
560    if (framesize) {
561      st->print("\n\t");
562      st->print("SUB    ESP, #%d\t# Create frame",framesize);
563    }
564  } else {
565    st->print("SUB    ESP, #%d\t# Create frame",framesize);
566    st->print("\n\t");
567    framesize -= wordSize;
568    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
569  }
570
571  if (VerifyStackAtCalls) {
572    st->print("\n\t");
573    framesize -= wordSize;
574    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
575  }
576
577  if( C->in_24_bit_fp_mode() ) {
578    st->print("\n\t");
579    st->print("FLDCW  \t# load 24 bit fpu control word");
580  }
581  if (UseSSE >= 2 && VerifyFPU) {
582    st->print("\n\t");
583    st->print("# verify FPU stack (must be clean on entry)");
584  }
585
586#ifdef ASSERT
587  if (VerifyStackAtCalls) {
588    st->print("\n\t");
589    st->print("# stack alignment check");
590  }
591#endif
592  st->cr();
593}
594#endif
595
596
597void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
598  Compile* C = ra_->C;
599  MacroAssembler _masm(&cbuf);
600
601  int framesize = C->frame_slots() << LogBytesPerInt;
602
603  __ verified_entry(framesize, C->need_stack_bang(framesize), C->in_24_bit_fp_mode());
604
605  C->set_frame_complete(cbuf.insts_size());
606
607  if (C->has_mach_constant_base_node()) {
608    // NOTE: We set the table base offset here because users might be
609    // emitted before MachConstantBaseNode.
610    Compile::ConstantTable& constant_table = C->constant_table();
611    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
612  }
613}
614
615uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
616  return MachNode::size(ra_); // too many variables; just compute it the hard way
617}
618
619int MachPrologNode::reloc() const {
620  return 0; // a large enough number
621}
622
623//=============================================================================
624#ifndef PRODUCT
625void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
626  Compile *C = ra_->C;
627  int framesize = C->frame_slots() << LogBytesPerInt;
628  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
629  // Remove two words for return addr and rbp,
630  framesize -= 2*wordSize;
631
632  if( C->in_24_bit_fp_mode() ) {
633    st->print("FLDCW  standard control word");
634    st->cr(); st->print("\t");
635  }
636  if( framesize ) {
637    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
638    st->cr(); st->print("\t");
639  }
640  st->print_cr("POPL   EBP"); st->print("\t");
641  if( do_polling() && C->is_method_compilation() ) {
642    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
643    st->cr(); st->print("\t");
644  }
645}
646#endif
647
648void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
649  Compile *C = ra_->C;
650
651  // If method set FPU control word, restore to standard control word
652  if( C->in_24_bit_fp_mode() ) {
653    MacroAssembler masm(&cbuf);
654    masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
655  }
656
657  int framesize = C->frame_slots() << LogBytesPerInt;
658  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
659  // Remove two words for return addr and rbp,
660  framesize -= 2*wordSize;
661
662  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
663
664  if( framesize >= 128 ) {
665    emit_opcode(cbuf, 0x81); // add  SP, #framesize
666    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
667    emit_d32(cbuf, framesize);
668  }
669  else if( framesize ) {
670    emit_opcode(cbuf, 0x83); // add  SP, #framesize
671    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
672    emit_d8(cbuf, framesize);
673  }
674
675  emit_opcode(cbuf, 0x58 | EBP_enc);
676
677  if( do_polling() && C->is_method_compilation() ) {
678    cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
679    emit_opcode(cbuf,0x85);
680    emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
681    emit_d32(cbuf, (intptr_t)os::get_polling_page());
682  }
683}
684
685uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
686  Compile *C = ra_->C;
687  // If method set FPU control word, restore to standard control word
688  int size = C->in_24_bit_fp_mode() ? 6 : 0;
689  if( do_polling() && C->is_method_compilation() ) size += 6;
690
691  int framesize = C->frame_slots() << LogBytesPerInt;
692  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
693  // Remove two words for return addr and rbp,
694  framesize -= 2*wordSize;
695
696  size++; // popl rbp,
697
698  if( framesize >= 128 ) {
699    size += 6;
700  } else {
701    size += framesize ? 3 : 0;
702  }
703  return size;
704}
705
706int MachEpilogNode::reloc() const {
707  return 0; // a large enough number
708}
709
710const Pipeline * MachEpilogNode::pipeline() const {
711  return MachNode::pipeline_class();
712}
713
714int MachEpilogNode::safepoint_offset() const { return 0; }
715
716//=============================================================================
717
718enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
719static enum RC rc_class( OptoReg::Name reg ) {
720
721  if( !OptoReg::is_valid(reg)  ) return rc_bad;
722  if (OptoReg::is_stack(reg)) return rc_stack;
723
724  VMReg r = OptoReg::as_VMReg(reg);
725  if (r->is_Register()) return rc_int;
726  if (r->is_FloatRegister()) {
727    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
728    return rc_float;
729  }
730  assert(r->is_XMMRegister(), "must be");
731  return rc_xmm;
732}
733
734static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
735                        int opcode, const char *op_str, int size, outputStream* st ) {
736  if( cbuf ) {
737    emit_opcode  (*cbuf, opcode );
738    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
739#ifndef PRODUCT
740  } else if( !do_size ) {
741    if( size != 0 ) st->print("\n\t");
742    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
743      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
744      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
745    } else { // FLD, FST, PUSH, POP
746      st->print("%s [ESP + #%d]",op_str,offset);
747    }
748#endif
749  }
750  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
751  return size+3+offset_size;
752}
753
754// Helper for XMM registers.  Extra opcode bits, limited syntax.
755static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
756                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
757  if (cbuf) {
758    MacroAssembler _masm(cbuf);
759    if (reg_lo+1 == reg_hi) { // double move?
760      if (is_load) {
761        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
762      } else {
763        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
764      }
765    } else {
766      if (is_load) {
767        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
768      } else {
769        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
770      }
771    }
772#ifndef PRODUCT
773  } else if (!do_size) {
774    if (size != 0) st->print("\n\t");
775    if (reg_lo+1 == reg_hi) { // double move?
776      if (is_load) st->print("%s %s,[ESP + #%d]",
777                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
778                              Matcher::regName[reg_lo], offset);
779      else         st->print("MOVSD  [ESP + #%d],%s",
780                              offset, Matcher::regName[reg_lo]);
781    } else {
782      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
783                              Matcher::regName[reg_lo], offset);
784      else         st->print("MOVSS  [ESP + #%d],%s",
785                              offset, Matcher::regName[reg_lo]);
786    }
787#endif
788  }
789  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
790  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes.
791  return size+5+offset_size;
792}
793
794
795static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
796                            int src_hi, int dst_hi, int size, outputStream* st ) {
797  if (cbuf) {
798    MacroAssembler _masm(cbuf);
799    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
800      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
801                as_XMMRegister(Matcher::_regEncode[src_lo]));
802    } else {
803      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
804                as_XMMRegister(Matcher::_regEncode[src_lo]));
805    }
806#ifndef PRODUCT
807  } else if (!do_size) {
808    if (size != 0) st->print("\n\t");
809    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
810      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
811        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
812      } else {
813        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
814      }
815    } else {
816      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
817        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
818      } else {
819        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
820      }
821    }
822#endif
823  }
824  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes.
825  // Only MOVAPS SSE prefix uses 1 byte.
826  int sz = 4;
827  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
828      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
829  return size + sz;
830}
831
832static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
833                            int src_hi, int dst_hi, int size, outputStream* st ) {
834  // 32-bit
835  if (cbuf) {
836    MacroAssembler _masm(cbuf);
837    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
838             as_Register(Matcher::_regEncode[src_lo]));
839#ifndef PRODUCT
840  } else if (!do_size) {
841    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
842#endif
843  }
844  return 4;
845}
846
847
848static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
849                                 int src_hi, int dst_hi, int size, outputStream* st ) {
850  // 32-bit
851  if (cbuf) {
852    MacroAssembler _masm(cbuf);
853    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
854             as_XMMRegister(Matcher::_regEncode[src_lo]));
855#ifndef PRODUCT
856  } else if (!do_size) {
857    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
858#endif
859  }
860  return 4;
861}
862
863static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
864  if( cbuf ) {
865    emit_opcode(*cbuf, 0x8B );
866    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
867#ifndef PRODUCT
868  } else if( !do_size ) {
869    if( size != 0 ) st->print("\n\t");
870    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
871#endif
872  }
873  return size+2;
874}
875
876static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
877                                 int offset, int size, outputStream* st ) {
878  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
879    if( cbuf ) {
880      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
881      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
882#ifndef PRODUCT
883    } else if( !do_size ) {
884      if( size != 0 ) st->print("\n\t");
885      st->print("FLD    %s",Matcher::regName[src_lo]);
886#endif
887    }
888    size += 2;
889  }
890
891  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
892  const char *op_str;
893  int op;
894  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
895    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
896    op = 0xDD;
897  } else {                   // 32-bit store
898    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
899    op = 0xD9;
900    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
901  }
902
903  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
904}
905
906uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
907  // Get registers to move
908  OptoReg::Name src_second = ra_->get_reg_second(in(1));
909  OptoReg::Name src_first = ra_->get_reg_first(in(1));
910  OptoReg::Name dst_second = ra_->get_reg_second(this );
911  OptoReg::Name dst_first = ra_->get_reg_first(this );
912
913  enum RC src_second_rc = rc_class(src_second);
914  enum RC src_first_rc = rc_class(src_first);
915  enum RC dst_second_rc = rc_class(dst_second);
916  enum RC dst_first_rc = rc_class(dst_first);
917
918  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
919
920  // Generate spill code!
921  int size = 0;
922
923  if( src_first == dst_first && src_second == dst_second )
924    return size;            // Self copy, no move
925
926  // --------------------------------------
927  // Check for mem-mem move.  push/pop to move.
928  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
929    if( src_second == dst_first ) { // overlapping stack copy ranges
930      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
931      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
932      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
933      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
934    }
935    // move low bits
936    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
937    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
938    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
939      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
940      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
941    }
942    return size;
943  }
944
945  // --------------------------------------
946  // Check for integer reg-reg copy
947  if( src_first_rc == rc_int && dst_first_rc == rc_int )
948    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
949
950  // Check for integer store
951  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
952    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
953
954  // Check for integer load
955  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
956    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
957
958  // Check for integer reg-xmm reg copy
959  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
960    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
961            "no 64 bit integer-float reg moves" );
962    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
963  }
964  // --------------------------------------
965  // Check for float reg-reg copy
966  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
967    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
968            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
969    if( cbuf ) {
970
971      // Note the mucking with the register encode to compensate for the 0/1
972      // indexing issue mentioned in a comment in the reg_def sections
973      // for FPR registers many lines above here.
974
975      if( src_first != FPR1L_num ) {
976        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
977        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
978        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
979        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
980     } else {
981        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
982        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
983     }
984#ifndef PRODUCT
985    } else if( !do_size ) {
986      if( size != 0 ) st->print("\n\t");
987      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
988      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
989#endif
990    }
991    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
992  }
993
994  // Check for float store
995  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
996    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
997  }
998
999  // Check for float load
1000  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1001    int offset = ra_->reg2offset(src_first);
1002    const char *op_str;
1003    int op;
1004    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1005      op_str = "FLD_D";
1006      op = 0xDD;
1007    } else {                   // 32-bit load
1008      op_str = "FLD_S";
1009      op = 0xD9;
1010      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1011    }
1012    if( cbuf ) {
1013      emit_opcode  (*cbuf, op );
1014      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
1015      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1016      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1017#ifndef PRODUCT
1018    } else if( !do_size ) {
1019      if( size != 0 ) st->print("\n\t");
1020      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1021#endif
1022    }
1023    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1024    return size + 3+offset_size+2;
1025  }
1026
1027  // Check for xmm reg-reg copy
1028  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1029    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1030            (src_first+1 == src_second && dst_first+1 == dst_second),
1031            "no non-adjacent float-moves" );
1032    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1033  }
1034
1035  // Check for xmm reg-integer reg copy
1036  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1037    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1038            "no 64 bit float-integer reg moves" );
1039    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1040  }
1041
1042  // Check for xmm store
1043  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1044    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1045  }
1046
1047  // Check for float xmm load
1048  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1049    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1050  }
1051
1052  // Copy from float reg to xmm reg
1053  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1054    // copy to the top of stack from floating point reg
1055    // and use LEA to preserve flags
1056    if( cbuf ) {
1057      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1058      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1059      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1060      emit_d8(*cbuf,0xF8);
1061#ifndef PRODUCT
1062    } else if( !do_size ) {
1063      if( size != 0 ) st->print("\n\t");
1064      st->print("LEA    ESP,[ESP-8]");
1065#endif
1066    }
1067    size += 4;
1068
1069    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1070
1071    // Copy from the temp memory to the xmm reg.
1072    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1073
1074    if( cbuf ) {
1075      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1076      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1077      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1078      emit_d8(*cbuf,0x08);
1079#ifndef PRODUCT
1080    } else if( !do_size ) {
1081      if( size != 0 ) st->print("\n\t");
1082      st->print("LEA    ESP,[ESP+8]");
1083#endif
1084    }
1085    size += 4;
1086    return size;
1087  }
1088
1089  assert( size > 0, "missed a case" );
1090
1091  // --------------------------------------------------------------------
1092  // Check for second bits still needing moving.
1093  if( src_second == dst_second )
1094    return size;               // Self copy; no move
1095  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1096
1097  // Check for second word int-int move
1098  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1099    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1100
1101  // Check for second word integer store
1102  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1103    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1104
1105  // Check for second word integer load
1106  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1107    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1108
1109
1110  Unimplemented();
1111}
1112
1113#ifndef PRODUCT
1114void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1115  implementation( NULL, ra_, false, st );
1116}
1117#endif
1118
1119void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1120  implementation( &cbuf, ra_, false, NULL );
1121}
1122
1123uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1124  return implementation( NULL, ra_, true, NULL );
1125}
1126
1127
1128//=============================================================================
1129#ifndef PRODUCT
1130void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1131  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1132  int reg = ra_->get_reg_first(this);
1133  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1134}
1135#endif
1136
1137void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1138  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1139  int reg = ra_->get_encode(this);
1140  if( offset >= 128 ) {
1141    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1142    emit_rm(cbuf, 0x2, reg, 0x04);
1143    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1144    emit_d32(cbuf, offset);
1145  }
1146  else {
1147    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1148    emit_rm(cbuf, 0x1, reg, 0x04);
1149    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1150    emit_d8(cbuf, offset);
1151  }
1152}
1153
1154uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1155  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1156  if( offset >= 128 ) {
1157    return 7;
1158  }
1159  else {
1160    return 4;
1161  }
1162}
1163
1164//=============================================================================
1165
1166// emit call stub, compiled java to interpreter
1167void emit_java_to_interp(CodeBuffer &cbuf ) {
1168  // Stub is fixed up when the corresponding call is converted from calling
1169  // compiled code to calling interpreted code.
1170  // mov rbx,0
1171  // jmp -1
1172
1173  address mark = cbuf.insts_mark();  // get mark within main instrs section
1174
1175  // Note that the code buffer's insts_mark is always relative to insts.
1176  // That's why we must use the macroassembler to generate a stub.
1177  MacroAssembler _masm(&cbuf);
1178
1179  address base =
1180  __ start_a_stub(Compile::MAX_stubs_size);
1181  if (base == NULL)  return;  // CodeBuffer::expand failed
1182  // static stub relocation stores the instruction address of the call
1183  __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1184  // static stub relocation also tags the methodOop in the code-stream.
1185  __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1186  // This is recognized as unresolved by relocs/nativeInst/ic code
1187  __ jump(RuntimeAddress(__ pc()));
1188
1189  __ end_a_stub();
1190  // Update current stubs pointer and restore insts_end.
1191}
1192// size of call stub, compiled java to interpretor
1193uint size_java_to_interp() {
1194  return 10;  // movl; jmp
1195}
1196// relocation entries for call stub, compiled java to interpretor
1197uint reloc_java_to_interp() {
1198  return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1199}
1200
1201//=============================================================================
1202#ifndef PRODUCT
1203void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1204  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1205  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1206  st->print_cr("\tNOP");
1207  st->print_cr("\tNOP");
1208  if( !OptoBreakpoint )
1209    st->print_cr("\tNOP");
1210}
1211#endif
1212
1213void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1214  MacroAssembler masm(&cbuf);
1215#ifdef ASSERT
1216  uint insts_size = cbuf.insts_size();
1217#endif
1218  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1219  masm.jump_cc(Assembler::notEqual,
1220               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1221  /* WARNING these NOPs are critical so that verified entry point is properly
1222     aligned for patching by NativeJump::patch_verified_entry() */
1223  int nops_cnt = 2;
1224  if( !OptoBreakpoint ) // Leave space for int3
1225     nops_cnt += 1;
1226  masm.nop(nops_cnt);
1227
1228  assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1229}
1230
1231uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1232  return OptoBreakpoint ? 11 : 12;
1233}
1234
1235
1236//=============================================================================
1237uint size_exception_handler() {
1238  // NativeCall instruction size is the same as NativeJump.
1239  // exception handler starts out as jump and can be patched to
1240  // a call be deoptimization.  (4932387)
1241  // Note that this value is also credited (in output.cpp) to
1242  // the size of the code section.
1243  return NativeJump::instruction_size;
1244}
1245
1246// Emit exception handler code.  Stuff framesize into a register
1247// and call a VM stub routine.
1248int emit_exception_handler(CodeBuffer& cbuf) {
1249
1250  // Note that the code buffer's insts_mark is always relative to insts.
1251  // That's why we must use the macroassembler to generate a handler.
1252  MacroAssembler _masm(&cbuf);
1253  address base =
1254  __ start_a_stub(size_exception_handler());
1255  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1256  int offset = __ offset();
1257  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->entry_point()));
1258  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1259  __ end_a_stub();
1260  return offset;
1261}
1262
1263uint size_deopt_handler() {
1264  // NativeCall instruction size is the same as NativeJump.
1265  // exception handler starts out as jump and can be patched to
1266  // a call be deoptimization.  (4932387)
1267  // Note that this value is also credited (in output.cpp) to
1268  // the size of the code section.
1269  return 5 + NativeJump::instruction_size; // pushl(); jmp;
1270}
1271
1272// Emit deopt handler code.
1273int emit_deopt_handler(CodeBuffer& cbuf) {
1274
1275  // Note that the code buffer's insts_mark is always relative to insts.
1276  // That's why we must use the macroassembler to generate a handler.
1277  MacroAssembler _masm(&cbuf);
1278  address base =
1279  __ start_a_stub(size_exception_handler());
1280  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1281  int offset = __ offset();
1282  InternalAddress here(__ pc());
1283  __ pushptr(here.addr());
1284
1285  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1286  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1287  __ end_a_stub();
1288  return offset;
1289}
1290
1291
1292const bool Matcher::match_rule_supported(int opcode) {
1293  if (!has_match_rule(opcode))
1294    return false;
1295
1296  return true;  // Per default match rules are supported.
1297}
1298
1299int Matcher::regnum_to_fpu_offset(int regnum) {
1300  return regnum - 32; // The FP registers are in the second chunk
1301}
1302
1303// This is UltraSparc specific, true just means we have fast l2f conversion
1304const bool Matcher::convL2FSupported(void) {
1305  return true;
1306}
1307
1308// Vector width in bytes
1309const uint Matcher::vector_width_in_bytes(void) {
1310  return UseSSE >= 2 ? 8 : 0;
1311}
1312
1313// Vector ideal reg
1314const uint Matcher::vector_ideal_reg(void) {
1315  return Op_RegD;
1316}
1317
1318// Is this branch offset short enough that a short branch can be used?
1319//
1320// NOTE: If the platform does not provide any short branch variants, then
1321//       this method should return false for offset 0.
1322bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1323  // The passed offset is relative to address of the branch.
1324  // On 86 a branch displacement is calculated relative to address
1325  // of a next instruction.
1326  offset -= br_size;
1327
1328  // the short version of jmpConUCF2 contains multiple branches,
1329  // making the reach slightly less
1330  if (rule == jmpConUCF2_rule)
1331    return (-126 <= offset && offset <= 125);
1332  return (-128 <= offset && offset <= 127);
1333}
1334
1335const bool Matcher::isSimpleConstant64(jlong value) {
1336  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1337  return false;
1338}
1339
1340// The ecx parameter to rep stos for the ClearArray node is in dwords.
1341const bool Matcher::init_array_count_is_in_bytes = false;
1342
1343// Threshold size for cleararray.
1344const int Matcher::init_array_short_size = 8 * BytesPerLong;
1345
1346// Needs 2 CMOV's for longs.
1347const int Matcher::long_cmove_cost() { return 1; }
1348
1349// No CMOVF/CMOVD with SSE/SSE2
1350const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1351
1352// Should the Matcher clone shifts on addressing modes, expecting them to
1353// be subsumed into complex addressing expressions or compute them into
1354// registers?  True for Intel but false for most RISCs
1355const bool Matcher::clone_shift_expressions = true;
1356
1357// Do we need to mask the count passed to shift instructions or does
1358// the cpu only look at the lower 5/6 bits anyway?
1359const bool Matcher::need_masked_shift_count = false;
1360
1361bool Matcher::narrow_oop_use_complex_address() {
1362  ShouldNotCallThis();
1363  return true;
1364}
1365
1366
1367// Is it better to copy float constants, or load them directly from memory?
1368// Intel can load a float constant from a direct address, requiring no
1369// extra registers.  Most RISCs will have to materialize an address into a
1370// register first, so they would do better to copy the constant from stack.
1371const bool Matcher::rematerialize_float_constants = true;
1372
1373// If CPU can load and store mis-aligned doubles directly then no fixup is
1374// needed.  Else we split the double into 2 integer pieces and move it
1375// piece-by-piece.  Only happens when passing doubles into C code as the
1376// Java calling convention forces doubles to be aligned.
1377const bool Matcher::misaligned_doubles_ok = true;
1378
1379
1380void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1381  // Get the memory operand from the node
1382  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1383  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1384  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1385  uint opcnt     = 1;                 // First operand
1386  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1387  while( idx >= skipped+num_edges ) {
1388    skipped += num_edges;
1389    opcnt++;                          // Bump operand count
1390    assert( opcnt < numopnds, "Accessing non-existent operand" );
1391    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1392  }
1393
1394  MachOper *memory = node->_opnds[opcnt];
1395  MachOper *new_memory = NULL;
1396  switch (memory->opcode()) {
1397  case DIRECT:
1398  case INDOFFSET32X:
1399    // No transformation necessary.
1400    return;
1401  case INDIRECT:
1402    new_memory = new (C) indirect_win95_safeOper( );
1403    break;
1404  case INDOFFSET8:
1405    new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1406    break;
1407  case INDOFFSET32:
1408    new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1409    break;
1410  case INDINDEXOFFSET:
1411    new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1412    break;
1413  case INDINDEXSCALE:
1414    new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1415    break;
1416  case INDINDEXSCALEOFFSET:
1417    new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1418    break;
1419  case LOAD_LONG_INDIRECT:
1420  case LOAD_LONG_INDOFFSET32:
1421    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1422    return;
1423  default:
1424    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1425    return;
1426  }
1427  node->_opnds[opcnt] = new_memory;
1428}
1429
1430// Advertise here if the CPU requires explicit rounding operations
1431// to implement the UseStrictFP mode.
1432const bool Matcher::strict_fp_requires_explicit_rounding = true;
1433
1434// Are floats conerted to double when stored to stack during deoptimization?
1435// On x32 it is stored with convertion only when FPU is used for floats.
1436bool Matcher::float_in_double() { return (UseSSE == 0); }
1437
1438// Do ints take an entire long register or just half?
1439const bool Matcher::int_in_long = false;
1440
1441// Return whether or not this register is ever used as an argument.  This
1442// function is used on startup to build the trampoline stubs in generateOptoStub.
1443// Registers not mentioned will be killed by the VM call in the trampoline, and
1444// arguments in those registers not be available to the callee.
1445bool Matcher::can_be_java_arg( int reg ) {
1446  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1447  if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1448  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1449  return false;
1450}
1451
1452bool Matcher::is_spillable_arg( int reg ) {
1453  return can_be_java_arg(reg);
1454}
1455
1456bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1457  // Use hardware integer DIV instruction when
1458  // it is faster than a code which use multiply.
1459  // Only when constant divisor fits into 32 bit
1460  // (min_jint is excluded to get only correct
1461  // positive 32 bit values from negative).
1462  return VM_Version::has_fast_idiv() &&
1463         (divisor == (int)divisor && divisor != min_jint);
1464}
1465
1466// Register for DIVI projection of divmodI
1467RegMask Matcher::divI_proj_mask() {
1468  return EAX_REG_mask();
1469}
1470
1471// Register for MODI projection of divmodI
1472RegMask Matcher::modI_proj_mask() {
1473  return EDX_REG_mask();
1474}
1475
1476// Register for DIVL projection of divmodL
1477RegMask Matcher::divL_proj_mask() {
1478  ShouldNotReachHere();
1479  return RegMask();
1480}
1481
1482// Register for MODL projection of divmodL
1483RegMask Matcher::modL_proj_mask() {
1484  ShouldNotReachHere();
1485  return RegMask();
1486}
1487
1488const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1489  return EBP_REG_mask();
1490}
1491
1492// Returns true if the high 32 bits of the value is known to be zero.
1493bool is_operand_hi32_zero(Node* n) {
1494  int opc = n->Opcode();
1495  if (opc == Op_LoadUI2L) {
1496    return true;
1497  }
1498  if (opc == Op_AndL) {
1499    Node* o2 = n->in(2);
1500    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1501      return true;
1502    }
1503  }
1504  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1505    return true;
1506  }
1507  return false;
1508}
1509
1510%}
1511
1512//----------ENCODING BLOCK-----------------------------------------------------
1513// This block specifies the encoding classes used by the compiler to output
1514// byte streams.  Encoding classes generate functions which are called by
1515// Machine Instruction Nodes in order to generate the bit encoding of the
1516// instruction.  Operands specify their base encoding interface with the
1517// interface keyword.  There are currently supported four interfaces,
1518// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1519// operand to generate a function which returns its register number when
1520// queried.   CONST_INTER causes an operand to generate a function which
1521// returns the value of the constant when queried.  MEMORY_INTER causes an
1522// operand to generate four functions which return the Base Register, the
1523// Index Register, the Scale Value, and the Offset Value of the operand when
1524// queried.  COND_INTER causes an operand to generate six functions which
1525// return the encoding code (ie - encoding bits for the instruction)
1526// associated with each basic boolean condition for a conditional instruction.
1527// Instructions specify two basic values for encoding.  They use the
1528// ins_encode keyword to specify their encoding class (which must be one of
1529// the class names specified in the encoding block), and they use the
1530// opcode keyword to specify, in order, their primary, secondary, and
1531// tertiary opcode.  Only the opcode sections which a particular instruction
1532// needs for encoding need to be specified.
1533encode %{
1534  // Build emit functions for each basic byte or larger field in the intel
1535  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1536  // code in the enc_class source block.  Emit functions will live in the
1537  // main source block for now.  In future, we can generalize this by
1538  // adding a syntax that specifies the sizes of fields in an order,
1539  // so that the adlc can build the emit functions automagically
1540
1541  // Emit primary opcode
1542  enc_class OpcP %{
1543    emit_opcode(cbuf, $primary);
1544  %}
1545
1546  // Emit secondary opcode
1547  enc_class OpcS %{
1548    emit_opcode(cbuf, $secondary);
1549  %}
1550
1551  // Emit opcode directly
1552  enc_class Opcode(immI d8) %{
1553    emit_opcode(cbuf, $d8$$constant);
1554  %}
1555
1556  enc_class SizePrefix %{
1557    emit_opcode(cbuf,0x66);
1558  %}
1559
1560  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1561    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1562  %}
1563
1564  enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1565    emit_opcode(cbuf,$opcode$$constant);
1566    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1567  %}
1568
1569  enc_class mov_r32_imm0( eRegI dst ) %{
1570    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1571    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1572  %}
1573
1574  enc_class cdq_enc %{
1575    // Full implementation of Java idiv and irem; checks for
1576    // special case as described in JVM spec., p.243 & p.271.
1577    //
1578    //         normal case                           special case
1579    //
1580    // input : rax,: dividend                         min_int
1581    //         reg: divisor                          -1
1582    //
1583    // output: rax,: quotient  (= rax, idiv reg)       min_int
1584    //         rdx: remainder (= rax, irem reg)       0
1585    //
1586    //  Code sequnce:
1587    //
1588    //  81 F8 00 00 00 80    cmp         rax,80000000h
1589    //  0F 85 0B 00 00 00    jne         normal_case
1590    //  33 D2                xor         rdx,edx
1591    //  83 F9 FF             cmp         rcx,0FFh
1592    //  0F 84 03 00 00 00    je          done
1593    //                  normal_case:
1594    //  99                   cdq
1595    //  F7 F9                idiv        rax,ecx
1596    //                  done:
1597    //
1598    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1599    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1600    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1601    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1602    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1603    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1604    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1605    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1606    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1607    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1608    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1609    // normal_case:
1610    emit_opcode(cbuf,0x99);                                         // cdq
1611    // idiv (note: must be emitted by the user of this rule)
1612    // normal:
1613  %}
1614
1615  // Dense encoding for older common ops
1616  enc_class Opc_plus(immI opcode, eRegI reg) %{
1617    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1618  %}
1619
1620
1621  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1622  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1623    // Check for 8-bit immediate, and set sign extend bit in opcode
1624    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1625      emit_opcode(cbuf, $primary | 0x02);
1626    }
1627    else {                          // If 32-bit immediate
1628      emit_opcode(cbuf, $primary);
1629    }
1630  %}
1631
1632  enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1633    // Emit primary opcode and set sign-extend bit
1634    // Check for 8-bit immediate, and set sign extend bit in opcode
1635    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1636      emit_opcode(cbuf, $primary | 0x02);    }
1637    else {                          // If 32-bit immediate
1638      emit_opcode(cbuf, $primary);
1639    }
1640    // Emit r/m byte with secondary opcode, after primary opcode.
1641    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1642  %}
1643
1644  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1645    // Check for 8-bit immediate, and set sign extend bit in opcode
1646    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1647      $$$emit8$imm$$constant;
1648    }
1649    else {                          // If 32-bit immediate
1650      // Output immediate
1651      $$$emit32$imm$$constant;
1652    }
1653  %}
1654
1655  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1656    // Emit primary opcode and set sign-extend bit
1657    // Check for 8-bit immediate, and set sign extend bit in opcode
1658    int con = (int)$imm$$constant; // Throw away top bits
1659    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1660    // Emit r/m byte with secondary opcode, after primary opcode.
1661    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1662    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1663    else                               emit_d32(cbuf,con);
1664  %}
1665
1666  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1667    // Emit primary opcode and set sign-extend bit
1668    // Check for 8-bit immediate, and set sign extend bit in opcode
1669    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1670    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1671    // Emit r/m byte with tertiary opcode, after primary opcode.
1672    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1673    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1674    else                               emit_d32(cbuf,con);
1675  %}
1676
1677  enc_class OpcSReg (eRegI dst) %{    // BSWAP
1678    emit_cc(cbuf, $secondary, $dst$$reg );
1679  %}
1680
1681  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1682    int destlo = $dst$$reg;
1683    int desthi = HIGH_FROM_LOW(destlo);
1684    // bswap lo
1685    emit_opcode(cbuf, 0x0F);
1686    emit_cc(cbuf, 0xC8, destlo);
1687    // bswap hi
1688    emit_opcode(cbuf, 0x0F);
1689    emit_cc(cbuf, 0xC8, desthi);
1690    // xchg lo and hi
1691    emit_opcode(cbuf, 0x87);
1692    emit_rm(cbuf, 0x3, destlo, desthi);
1693  %}
1694
1695  enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1696    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1697  %}
1698
1699  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1700    $$$emit8$primary;
1701    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1702  %}
1703
1704  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1705    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1706    emit_d8(cbuf, op >> 8 );
1707    emit_d8(cbuf, op & 255);
1708  %}
1709
1710  // emulate a CMOV with a conditional branch around a MOV
1711  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1712    // Invert sense of branch from sense of CMOV
1713    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1714    emit_d8( cbuf, $brOffs$$constant );
1715  %}
1716
1717  enc_class enc_PartialSubtypeCheck( ) %{
1718    Register Redi = as_Register(EDI_enc); // result register
1719    Register Reax = as_Register(EAX_enc); // super class
1720    Register Recx = as_Register(ECX_enc); // killed
1721    Register Resi = as_Register(ESI_enc); // sub class
1722    Label miss;
1723
1724    MacroAssembler _masm(&cbuf);
1725    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1726                                     NULL, &miss,
1727                                     /*set_cond_codes:*/ true);
1728    if ($primary) {
1729      __ xorptr(Redi, Redi);
1730    }
1731    __ bind(miss);
1732  %}
1733
1734  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1735    MacroAssembler masm(&cbuf);
1736    int start = masm.offset();
1737    if (UseSSE >= 2) {
1738      if (VerifyFPU) {
1739        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1740      }
1741    } else {
1742      // External c_calling_convention expects the FPU stack to be 'clean'.
1743      // Compiled code leaves it dirty.  Do cleanup now.
1744      masm.empty_FPU_stack();
1745    }
1746    if (sizeof_FFree_Float_Stack_All == -1) {
1747      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1748    } else {
1749      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1750    }
1751  %}
1752
1753  enc_class Verify_FPU_For_Leaf %{
1754    if( VerifyFPU ) {
1755      MacroAssembler masm(&cbuf);
1756      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1757    }
1758  %}
1759
1760  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1761    // This is the instruction starting address for relocation info.
1762    cbuf.set_insts_mark();
1763    $$$emit8$primary;
1764    // CALL directly to the runtime
1765    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1766                runtime_call_Relocation::spec(), RELOC_IMM32 );
1767
1768    if (UseSSE >= 2) {
1769      MacroAssembler _masm(&cbuf);
1770      BasicType rt = tf()->return_type();
1771
1772      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1773        // A C runtime call where the return value is unused.  In SSE2+
1774        // mode the result needs to be removed from the FPU stack.  It's
1775        // likely that this function call could be removed by the
1776        // optimizer if the C function is a pure function.
1777        __ ffree(0);
1778      } else if (rt == T_FLOAT) {
1779        __ lea(rsp, Address(rsp, -4));
1780        __ fstp_s(Address(rsp, 0));
1781        __ movflt(xmm0, Address(rsp, 0));
1782        __ lea(rsp, Address(rsp,  4));
1783      } else if (rt == T_DOUBLE) {
1784        __ lea(rsp, Address(rsp, -8));
1785        __ fstp_d(Address(rsp, 0));
1786        __ movdbl(xmm0, Address(rsp, 0));
1787        __ lea(rsp, Address(rsp,  8));
1788      }
1789    }
1790  %}
1791
1792
1793  enc_class pre_call_FPU %{
1794    // If method sets FPU control word restore it here
1795    debug_only(int off0 = cbuf.insts_size());
1796    if( Compile::current()->in_24_bit_fp_mode() ) {
1797      MacroAssembler masm(&cbuf);
1798      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1799    }
1800    debug_only(int off1 = cbuf.insts_size());
1801    assert(off1 - off0 == pre_call_FPU_size(), "correct size prediction");
1802  %}
1803
1804  enc_class post_call_FPU %{
1805    // If method sets FPU control word do it here also
1806    if( Compile::current()->in_24_bit_fp_mode() ) {
1807      MacroAssembler masm(&cbuf);
1808      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1809    }
1810  %}
1811
1812  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1813    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1814    // who we intended to call.
1815    cbuf.set_insts_mark();
1816    $$$emit8$primary;
1817    if ( !_method ) {
1818      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1819                     runtime_call_Relocation::spec(), RELOC_IMM32 );
1820    } else if(_optimized_virtual) {
1821      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1822                     opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1823    } else {
1824      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1825                     static_call_Relocation::spec(), RELOC_IMM32 );
1826    }
1827    if( _method ) {  // Emit stub for static call
1828      emit_java_to_interp(cbuf);
1829    }
1830  %}
1831
1832  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1833    // !!!!!
1834    // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1835    // emit_call_dynamic_prologue( cbuf );
1836    cbuf.set_insts_mark();
1837    emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1838    emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1839    address  virtual_call_oop_addr = cbuf.insts_mark();
1840    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1841    // who we intended to call.
1842    cbuf.set_insts_mark();
1843    $$$emit8$primary;
1844    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1845                virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1846  %}
1847
1848  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1849    int disp = in_bytes(methodOopDesc::from_compiled_offset());
1850    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1851
1852    // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1853    cbuf.set_insts_mark();
1854    $$$emit8$primary;
1855    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1856    emit_d8(cbuf, disp);             // Displacement
1857
1858  %}
1859
1860//   Following encoding is no longer used, but may be restored if calling
1861//   convention changes significantly.
1862//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1863//
1864//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1865//     // int ic_reg     = Matcher::inline_cache_reg();
1866//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1867//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1868//     // int imo_encode = Matcher::_regEncode[imo_reg];
1869//
1870//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1871//     // // so we load it immediately before the call
1872//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1873//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1874//
1875//     // xor rbp,ebp
1876//     emit_opcode(cbuf, 0x33);
1877//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1878//
1879//     // CALL to interpreter.
1880//     cbuf.set_insts_mark();
1881//     $$$emit8$primary;
1882//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1883//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1884//   %}
1885
1886  enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1887    $$$emit8$primary;
1888    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1889    $$$emit8$shift$$constant;
1890  %}
1891
1892  enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1893    // Load immediate does not have a zero or sign extended version
1894    // for 8-bit immediates
1895    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1896    $$$emit32$src$$constant;
1897  %}
1898
1899  enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
1900    // Load immediate does not have a zero or sign extended version
1901    // for 8-bit immediates
1902    emit_opcode(cbuf, $primary + $dst$$reg);
1903    $$$emit32$src$$constant;
1904  %}
1905
1906  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1907    // Load immediate does not have a zero or sign extended version
1908    // for 8-bit immediates
1909    int dst_enc = $dst$$reg;
1910    int src_con = $src$$constant & 0x0FFFFFFFFL;
1911    if (src_con == 0) {
1912      // xor dst, dst
1913      emit_opcode(cbuf, 0x33);
1914      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1915    } else {
1916      emit_opcode(cbuf, $primary + dst_enc);
1917      emit_d32(cbuf, src_con);
1918    }
1919  %}
1920
1921  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1922    // Load immediate does not have a zero or sign extended version
1923    // for 8-bit immediates
1924    int dst_enc = $dst$$reg + 2;
1925    int src_con = ((julong)($src$$constant)) >> 32;
1926    if (src_con == 0) {
1927      // xor dst, dst
1928      emit_opcode(cbuf, 0x33);
1929      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1930    } else {
1931      emit_opcode(cbuf, $primary + dst_enc);
1932      emit_d32(cbuf, src_con);
1933    }
1934  %}
1935
1936
1937  // Encode a reg-reg copy.  If it is useless, then empty encoding.
1938  enc_class enc_Copy( eRegI dst, eRegI src ) %{
1939    encode_Copy( cbuf, $dst$$reg, $src$$reg );
1940  %}
1941
1942  enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
1943    encode_Copy( cbuf, $dst$$reg, $src$$reg );
1944  %}
1945
1946  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1947    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1948  %}
1949
1950  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
1951    $$$emit8$primary;
1952    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1953  %}
1954
1955  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
1956    $$$emit8$secondary;
1957    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1958  %}
1959
1960  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
1961    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1962  %}
1963
1964  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
1965    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
1966  %}
1967
1968  enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
1969    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
1970  %}
1971
1972  enc_class Con32 (immI src) %{    // Con32(storeImmI)
1973    // Output immediate
1974    $$$emit32$src$$constant;
1975  %}
1976
1977  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
1978    // Output Float immediate bits
1979    jfloat jf = $src$$constant;
1980    int    jf_as_bits = jint_cast( jf );
1981    emit_d32(cbuf, jf_as_bits);
1982  %}
1983
1984  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
1985    // Output Float immediate bits
1986    jfloat jf = $src$$constant;
1987    int    jf_as_bits = jint_cast( jf );
1988    emit_d32(cbuf, jf_as_bits);
1989  %}
1990
1991  enc_class Con16 (immI src) %{    // Con16(storeImmI)
1992    // Output immediate
1993    $$$emit16$src$$constant;
1994  %}
1995
1996  enc_class Con_d32(immI src) %{
1997    emit_d32(cbuf,$src$$constant);
1998  %}
1999
2000  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2001    // Output immediate memory reference
2002    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2003    emit_d32(cbuf, 0x00);
2004  %}
2005
2006  enc_class lock_prefix( ) %{
2007    if( os::is_MP() )
2008      emit_opcode(cbuf,0xF0);         // [Lock]
2009  %}
2010
2011  // Cmp-xchg long value.
2012  // Note: we need to swap rbx, and rcx before and after the
2013  //       cmpxchg8 instruction because the instruction uses
2014  //       rcx as the high order word of the new value to store but
2015  //       our register encoding uses rbx,.
2016  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2017
2018    // XCHG  rbx,ecx
2019    emit_opcode(cbuf,0x87);
2020    emit_opcode(cbuf,0xD9);
2021    // [Lock]
2022    if( os::is_MP() )
2023      emit_opcode(cbuf,0xF0);
2024    // CMPXCHG8 [Eptr]
2025    emit_opcode(cbuf,0x0F);
2026    emit_opcode(cbuf,0xC7);
2027    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2028    // XCHG  rbx,ecx
2029    emit_opcode(cbuf,0x87);
2030    emit_opcode(cbuf,0xD9);
2031  %}
2032
2033  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2034    // [Lock]
2035    if( os::is_MP() )
2036      emit_opcode(cbuf,0xF0);
2037
2038    // CMPXCHG [Eptr]
2039    emit_opcode(cbuf,0x0F);
2040    emit_opcode(cbuf,0xB1);
2041    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2042  %}
2043
2044  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2045    int res_encoding = $res$$reg;
2046
2047    // MOV  res,0
2048    emit_opcode( cbuf, 0xB8 + res_encoding);
2049    emit_d32( cbuf, 0 );
2050    // JNE,s  fail
2051    emit_opcode(cbuf,0x75);
2052    emit_d8(cbuf, 5 );
2053    // MOV  res,1
2054    emit_opcode( cbuf, 0xB8 + res_encoding);
2055    emit_d32( cbuf, 1 );
2056    // fail:
2057  %}
2058
2059  enc_class set_instruction_start( ) %{
2060    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2061  %}
2062
2063  enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2064    int reg_encoding = $ereg$$reg;
2065    int base  = $mem$$base;
2066    int index = $mem$$index;
2067    int scale = $mem$$scale;
2068    int displace = $mem$$disp;
2069    bool disp_is_oop = $mem->disp_is_oop();
2070    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2071  %}
2072
2073  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2074    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2075    int base  = $mem$$base;
2076    int index = $mem$$index;
2077    int scale = $mem$$scale;
2078    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2079    assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2080    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2081  %}
2082
2083  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2084    int r1, r2;
2085    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2086    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2087    emit_opcode(cbuf,0x0F);
2088    emit_opcode(cbuf,$tertiary);
2089    emit_rm(cbuf, 0x3, r1, r2);
2090    emit_d8(cbuf,$cnt$$constant);
2091    emit_d8(cbuf,$primary);
2092    emit_rm(cbuf, 0x3, $secondary, r1);
2093    emit_d8(cbuf,$cnt$$constant);
2094  %}
2095
2096  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2097    emit_opcode( cbuf, 0x8B ); // Move
2098    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2099    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2100      emit_d8(cbuf,$primary);
2101      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2102      emit_d8(cbuf,$cnt$$constant-32);
2103    }
2104    emit_d8(cbuf,$primary);
2105    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2106    emit_d8(cbuf,31);
2107  %}
2108
2109  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2110    int r1, r2;
2111    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2112    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2113
2114    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2115    emit_rm(cbuf, 0x3, r1, r2);
2116    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2117      emit_opcode(cbuf,$primary);
2118      emit_rm(cbuf, 0x3, $secondary, r1);
2119      emit_d8(cbuf,$cnt$$constant-32);
2120    }
2121    emit_opcode(cbuf,0x33);  // XOR r2,r2
2122    emit_rm(cbuf, 0x3, r2, r2);
2123  %}
2124
2125  // Clone of RegMem but accepts an extra parameter to access each
2126  // half of a double in memory; it never needs relocation info.
2127  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2128    emit_opcode(cbuf,$opcode$$constant);
2129    int reg_encoding = $rm_reg$$reg;
2130    int base     = $mem$$base;
2131    int index    = $mem$$index;
2132    int scale    = $mem$$scale;
2133    int displace = $mem$$disp + $disp_for_half$$constant;
2134    bool disp_is_oop = false;
2135    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2136  %}
2137
2138  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2139  //
2140  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2141  // and it never needs relocation information.
2142  // Frequently used to move data between FPU's Stack Top and memory.
2143  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2144    int rm_byte_opcode = $rm_opcode$$constant;
2145    int base     = $mem$$base;
2146    int index    = $mem$$index;
2147    int scale    = $mem$$scale;
2148    int displace = $mem$$disp;
2149    assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2150    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2151  %}
2152
2153  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2154    int rm_byte_opcode = $rm_opcode$$constant;
2155    int base     = $mem$$base;
2156    int index    = $mem$$index;
2157    int scale    = $mem$$scale;
2158    int displace = $mem$$disp;
2159    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2160    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2161  %}
2162
2163  enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2164    int reg_encoding = $dst$$reg;
2165    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2166    int index        = 0x04;            // 0x04 indicates no index
2167    int scale        = 0x00;            // 0x00 indicates no scale
2168    int displace     = $src1$$constant; // 0x00 indicates no displacement
2169    bool disp_is_oop = false;
2170    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2171  %}
2172
2173  enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2174    // Compare dst,src
2175    emit_opcode(cbuf,0x3B);
2176    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2177    // jmp dst < src around move
2178    emit_opcode(cbuf,0x7C);
2179    emit_d8(cbuf,2);
2180    // move dst,src
2181    emit_opcode(cbuf,0x8B);
2182    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2183  %}
2184
2185  enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2186    // Compare dst,src
2187    emit_opcode(cbuf,0x3B);
2188    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2189    // jmp dst > src around move
2190    emit_opcode(cbuf,0x7F);
2191    emit_d8(cbuf,2);
2192    // move dst,src
2193    emit_opcode(cbuf,0x8B);
2194    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2195  %}
2196
2197  enc_class enc_FPR_store(memory mem, regDPR src) %{
2198    // If src is FPR1, we can just FST to store it.
2199    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2200    int reg_encoding = 0x2; // Just store
2201    int base  = $mem$$base;
2202    int index = $mem$$index;
2203    int scale = $mem$$scale;
2204    int displace = $mem$$disp;
2205    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2206    if( $src$$reg != FPR1L_enc ) {
2207      reg_encoding = 0x3;  // Store & pop
2208      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2209      emit_d8( cbuf, 0xC0-1+$src$$reg );
2210    }
2211    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2212    emit_opcode(cbuf,$primary);
2213    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2214  %}
2215
2216  enc_class neg_reg(eRegI dst) %{
2217    // NEG $dst
2218    emit_opcode(cbuf,0xF7);
2219    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2220  %}
2221
2222  enc_class setLT_reg(eCXRegI dst) %{
2223    // SETLT $dst
2224    emit_opcode(cbuf,0x0F);
2225    emit_opcode(cbuf,0x9C);
2226    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2227  %}
2228
2229  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2230    int tmpReg = $tmp$$reg;
2231
2232    // SUB $p,$q
2233    emit_opcode(cbuf,0x2B);
2234    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2235    // SBB $tmp,$tmp
2236    emit_opcode(cbuf,0x1B);
2237    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2238    // AND $tmp,$y
2239    emit_opcode(cbuf,0x23);
2240    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2241    // ADD $p,$tmp
2242    emit_opcode(cbuf,0x03);
2243    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2244  %}
2245
2246  enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2247    int tmpReg = $tmp$$reg;
2248
2249    // SUB $p,$q
2250    emit_opcode(cbuf,0x2B);
2251    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2252    // SBB $tmp,$tmp
2253    emit_opcode(cbuf,0x1B);
2254    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2255    // AND $tmp,$y
2256    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2257    emit_opcode(cbuf,0x23);
2258    int reg_encoding = tmpReg;
2259    int base  = $mem$$base;
2260    int index = $mem$$index;
2261    int scale = $mem$$scale;
2262    int displace = $mem$$disp;
2263    bool disp_is_oop = $mem->disp_is_oop();
2264    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2265    // ADD $p,$tmp
2266    emit_opcode(cbuf,0x03);
2267    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2268  %}
2269
2270  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2271    // TEST shift,32
2272    emit_opcode(cbuf,0xF7);
2273    emit_rm(cbuf, 0x3, 0, ECX_enc);
2274    emit_d32(cbuf,0x20);
2275    // JEQ,s small
2276    emit_opcode(cbuf, 0x74);
2277    emit_d8(cbuf, 0x04);
2278    // MOV    $dst.hi,$dst.lo
2279    emit_opcode( cbuf, 0x8B );
2280    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2281    // CLR    $dst.lo
2282    emit_opcode(cbuf, 0x33);
2283    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2284// small:
2285    // SHLD   $dst.hi,$dst.lo,$shift
2286    emit_opcode(cbuf,0x0F);
2287    emit_opcode(cbuf,0xA5);
2288    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2289    // SHL    $dst.lo,$shift"
2290    emit_opcode(cbuf,0xD3);
2291    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2292  %}
2293
2294  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2295    // TEST shift,32
2296    emit_opcode(cbuf,0xF7);
2297    emit_rm(cbuf, 0x3, 0, ECX_enc);
2298    emit_d32(cbuf,0x20);
2299    // JEQ,s small
2300    emit_opcode(cbuf, 0x74);
2301    emit_d8(cbuf, 0x04);
2302    // MOV    $dst.lo,$dst.hi
2303    emit_opcode( cbuf, 0x8B );
2304    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2305    // CLR    $dst.hi
2306    emit_opcode(cbuf, 0x33);
2307    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2308// small:
2309    // SHRD   $dst.lo,$dst.hi,$shift
2310    emit_opcode(cbuf,0x0F);
2311    emit_opcode(cbuf,0xAD);
2312    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2313    // SHR    $dst.hi,$shift"
2314    emit_opcode(cbuf,0xD3);
2315    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2316  %}
2317
2318  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2319    // TEST shift,32
2320    emit_opcode(cbuf,0xF7);
2321    emit_rm(cbuf, 0x3, 0, ECX_enc);
2322    emit_d32(cbuf,0x20);
2323    // JEQ,s small
2324    emit_opcode(cbuf, 0x74);
2325    emit_d8(cbuf, 0x05);
2326    // MOV    $dst.lo,$dst.hi
2327    emit_opcode( cbuf, 0x8B );
2328    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2329    // SAR    $dst.hi,31
2330    emit_opcode(cbuf, 0xC1);
2331    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2332    emit_d8(cbuf, 0x1F );
2333// small:
2334    // SHRD   $dst.lo,$dst.hi,$shift
2335    emit_opcode(cbuf,0x0F);
2336    emit_opcode(cbuf,0xAD);
2337    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2338    // SAR    $dst.hi,$shift"
2339    emit_opcode(cbuf,0xD3);
2340    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2341  %}
2342
2343
2344  // ----------------- Encodings for floating point unit -----------------
2345  // May leave result in FPU-TOS or FPU reg depending on opcodes
2346  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2347    $$$emit8$primary;
2348    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2349  %}
2350
2351  // Pop argument in FPR0 with FSTP ST(0)
2352  enc_class PopFPU() %{
2353    emit_opcode( cbuf, 0xDD );
2354    emit_d8( cbuf, 0xD8 );
2355  %}
2356
2357  // !!!!! equivalent to Pop_Reg_F
2358  enc_class Pop_Reg_DPR( regDPR dst ) %{
2359    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2360    emit_d8( cbuf, 0xD8+$dst$$reg );
2361  %}
2362
2363  enc_class Push_Reg_DPR( regDPR dst ) %{
2364    emit_opcode( cbuf, 0xD9 );
2365    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2366  %}
2367
2368  enc_class strictfp_bias1( regDPR dst ) %{
2369    emit_opcode( cbuf, 0xDB );           // FLD m80real
2370    emit_opcode( cbuf, 0x2D );
2371    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2372    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2373    emit_opcode( cbuf, 0xC8+$dst$$reg );
2374  %}
2375
2376  enc_class strictfp_bias2( regDPR dst ) %{
2377    emit_opcode( cbuf, 0xDB );           // FLD m80real
2378    emit_opcode( cbuf, 0x2D );
2379    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2380    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2381    emit_opcode( cbuf, 0xC8+$dst$$reg );
2382  %}
2383
2384  // Special case for moving an integer register to a stack slot.
2385  enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2386    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2387  %}
2388
2389  // Special case for moving a register to a stack slot.
2390  enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2391    // Opcode already emitted
2392    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2393    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2394    emit_d32(cbuf, $dst$$disp);   // Displacement
2395  %}
2396
2397  // Push the integer in stackSlot 'src' onto FP-stack
2398  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2399    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2400  %}
2401
2402  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2403  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2404    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2405  %}
2406
2407  // Same as Pop_Mem_F except for opcode
2408  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2409  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2410    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2411  %}
2412
2413  enc_class Pop_Reg_FPR( regFPR dst ) %{
2414    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2415    emit_d8( cbuf, 0xD8+$dst$$reg );
2416  %}
2417
2418  enc_class Push_Reg_FPR( regFPR dst ) %{
2419    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2420    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2421  %}
2422
2423  // Push FPU's float to a stack-slot, and pop FPU-stack
2424  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2425    int pop = 0x02;
2426    if ($src$$reg != FPR1L_enc) {
2427      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2428      emit_d8( cbuf, 0xC0-1+$src$$reg );
2429      pop = 0x03;
2430    }
2431    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2432  %}
2433
2434  // Push FPU's double to a stack-slot, and pop FPU-stack
2435  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2436    int pop = 0x02;
2437    if ($src$$reg != FPR1L_enc) {
2438      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2439      emit_d8( cbuf, 0xC0-1+$src$$reg );
2440      pop = 0x03;
2441    }
2442    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2443  %}
2444
2445  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2446  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2447    int pop = 0xD0 - 1; // -1 since we skip FLD
2448    if ($src$$reg != FPR1L_enc) {
2449      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2450      emit_d8( cbuf, 0xC0-1+$src$$reg );
2451      pop = 0xD8;
2452    }
2453    emit_opcode( cbuf, 0xDD );
2454    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2455  %}
2456
2457
2458  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2459    // load dst in FPR0
2460    emit_opcode( cbuf, 0xD9 );
2461    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2462    if ($src$$reg != FPR1L_enc) {
2463      // fincstp
2464      emit_opcode (cbuf, 0xD9);
2465      emit_opcode (cbuf, 0xF7);
2466      // swap src with FPR1:
2467      // FXCH FPR1 with src
2468      emit_opcode(cbuf, 0xD9);
2469      emit_d8(cbuf, 0xC8-1+$src$$reg );
2470      // fdecstp
2471      emit_opcode (cbuf, 0xD9);
2472      emit_opcode (cbuf, 0xF6);
2473    }
2474  %}
2475
2476  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2477    MacroAssembler _masm(&cbuf);
2478    __ subptr(rsp, 8);
2479    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2480    __ fld_d(Address(rsp, 0));
2481    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2482    __ fld_d(Address(rsp, 0));
2483  %}
2484
2485  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2486    MacroAssembler _masm(&cbuf);
2487    __ subptr(rsp, 4);
2488    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2489    __ fld_s(Address(rsp, 0));
2490    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2491    __ fld_s(Address(rsp, 0));
2492  %}
2493
2494  enc_class Push_ResultD(regD dst) %{
2495    MacroAssembler _masm(&cbuf);
2496    __ fstp_d(Address(rsp, 0));
2497    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2498    __ addptr(rsp, 8);
2499  %}
2500
2501  enc_class Push_ResultF(regF dst, immI d8) %{
2502    MacroAssembler _masm(&cbuf);
2503    __ fstp_s(Address(rsp, 0));
2504    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2505    __ addptr(rsp, $d8$$constant);
2506  %}
2507
2508  enc_class Push_SrcD(regD src) %{
2509    MacroAssembler _masm(&cbuf);
2510    __ subptr(rsp, 8);
2511    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2512    __ fld_d(Address(rsp, 0));
2513  %}
2514
2515  enc_class push_stack_temp_qword() %{
2516    MacroAssembler _masm(&cbuf);
2517    __ subptr(rsp, 8);
2518  %}
2519
2520  enc_class pop_stack_temp_qword() %{
2521    MacroAssembler _masm(&cbuf);
2522    __ addptr(rsp, 8);
2523  %}
2524
2525  enc_class push_xmm_to_fpr1(regD src) %{
2526    MacroAssembler _masm(&cbuf);
2527    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2528    __ fld_d(Address(rsp, 0));
2529  %}
2530
2531  // Compute X^Y using Intel's fast hardware instructions, if possible.
2532  // Otherwise return a NaN.
2533  enc_class pow_exp_core_encoding %{
2534    // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2535    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2536    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2537    emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2538    emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2539    emit_opcode(cbuf,0x1C);
2540    emit_d8(cbuf,0x24);
2541    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2542    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2543    emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2544    emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2545    encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2546    emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2547    emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2548    emit_d32(cbuf,0xFFFFF800);
2549    emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2550    emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2551    emit_d32(cbuf,1023);
2552    emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2553    emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2554    emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2555    emit_rm(cbuf,0x3,0x4,EAX_enc);
2556    emit_d8(cbuf,20);
2557    emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2558    emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2559    emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2560    emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2561    emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2562    encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2563    emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2564    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2565    emit_d32(cbuf,0);
2566    emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2567    encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2568  %}
2569
2570  enc_class Push_Result_Mod_DPR( regDPR src) %{
2571    if ($src$$reg != FPR1L_enc) {
2572      // fincstp
2573      emit_opcode (cbuf, 0xD9);
2574      emit_opcode (cbuf, 0xF7);
2575      // FXCH FPR1 with src
2576      emit_opcode(cbuf, 0xD9);
2577      emit_d8(cbuf, 0xC8-1+$src$$reg );
2578      // fdecstp
2579      emit_opcode (cbuf, 0xD9);
2580      emit_opcode (cbuf, 0xF6);
2581    }
2582    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2583    // // FSTP   FPR$dst$$reg
2584    // emit_opcode( cbuf, 0xDD );
2585    // emit_d8( cbuf, 0xD8+$dst$$reg );
2586  %}
2587
2588  enc_class fnstsw_sahf_skip_parity() %{
2589    // fnstsw ax
2590    emit_opcode( cbuf, 0xDF );
2591    emit_opcode( cbuf, 0xE0 );
2592    // sahf
2593    emit_opcode( cbuf, 0x9E );
2594    // jnp  ::skip
2595    emit_opcode( cbuf, 0x7B );
2596    emit_opcode( cbuf, 0x05 );
2597  %}
2598
2599  enc_class emitModDPR() %{
2600    // fprem must be iterative
2601    // :: loop
2602    // fprem
2603    emit_opcode( cbuf, 0xD9 );
2604    emit_opcode( cbuf, 0xF8 );
2605    // wait
2606    emit_opcode( cbuf, 0x9b );
2607    // fnstsw ax
2608    emit_opcode( cbuf, 0xDF );
2609    emit_opcode( cbuf, 0xE0 );
2610    // sahf
2611    emit_opcode( cbuf, 0x9E );
2612    // jp  ::loop
2613    emit_opcode( cbuf, 0x0F );
2614    emit_opcode( cbuf, 0x8A );
2615    emit_opcode( cbuf, 0xF4 );
2616    emit_opcode( cbuf, 0xFF );
2617    emit_opcode( cbuf, 0xFF );
2618    emit_opcode( cbuf, 0xFF );
2619  %}
2620
2621  enc_class fpu_flags() %{
2622    // fnstsw_ax
2623    emit_opcode( cbuf, 0xDF);
2624    emit_opcode( cbuf, 0xE0);
2625    // test ax,0x0400
2626    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2627    emit_opcode( cbuf, 0xA9 );
2628    emit_d16   ( cbuf, 0x0400 );
2629    // // // This sequence works, but stalls for 12-16 cycles on PPro
2630    // // test rax,0x0400
2631    // emit_opcode( cbuf, 0xA9 );
2632    // emit_d32   ( cbuf, 0x00000400 );
2633    //
2634    // jz exit (no unordered comparison)
2635    emit_opcode( cbuf, 0x74 );
2636    emit_d8    ( cbuf, 0x02 );
2637    // mov ah,1 - treat as LT case (set carry flag)
2638    emit_opcode( cbuf, 0xB4 );
2639    emit_d8    ( cbuf, 0x01 );
2640    // sahf
2641    emit_opcode( cbuf, 0x9E);
2642  %}
2643
2644  enc_class cmpF_P6_fixup() %{
2645    // Fixup the integer flags in case comparison involved a NaN
2646    //
2647    // JNP exit (no unordered comparison, P-flag is set by NaN)
2648    emit_opcode( cbuf, 0x7B );
2649    emit_d8    ( cbuf, 0x03 );
2650    // MOV AH,1 - treat as LT case (set carry flag)
2651    emit_opcode( cbuf, 0xB4 );
2652    emit_d8    ( cbuf, 0x01 );
2653    // SAHF
2654    emit_opcode( cbuf, 0x9E);
2655    // NOP     // target for branch to avoid branch to branch
2656    emit_opcode( cbuf, 0x90);
2657  %}
2658
2659//     fnstsw_ax();
2660//     sahf();
2661//     movl(dst, nan_result);
2662//     jcc(Assembler::parity, exit);
2663//     movl(dst, less_result);
2664//     jcc(Assembler::below, exit);
2665//     movl(dst, equal_result);
2666//     jcc(Assembler::equal, exit);
2667//     movl(dst, greater_result);
2668
2669// less_result     =  1;
2670// greater_result  = -1;
2671// equal_result    = 0;
2672// nan_result      = -1;
2673
2674  enc_class CmpF_Result(eRegI dst) %{
2675    // fnstsw_ax();
2676    emit_opcode( cbuf, 0xDF);
2677    emit_opcode( cbuf, 0xE0);
2678    // sahf
2679    emit_opcode( cbuf, 0x9E);
2680    // movl(dst, nan_result);
2681    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2682    emit_d32( cbuf, -1 );
2683    // jcc(Assembler::parity, exit);
2684    emit_opcode( cbuf, 0x7A );
2685    emit_d8    ( cbuf, 0x13 );
2686    // movl(dst, less_result);
2687    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2688    emit_d32( cbuf, -1 );
2689    // jcc(Assembler::below, exit);
2690    emit_opcode( cbuf, 0x72 );
2691    emit_d8    ( cbuf, 0x0C );
2692    // movl(dst, equal_result);
2693    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2694    emit_d32( cbuf, 0 );
2695    // jcc(Assembler::equal, exit);
2696    emit_opcode( cbuf, 0x74 );
2697    emit_d8    ( cbuf, 0x05 );
2698    // movl(dst, greater_result);
2699    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2700    emit_d32( cbuf, 1 );
2701  %}
2702
2703
2704  // Compare the longs and set flags
2705  // BROKEN!  Do Not use as-is
2706  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2707    // CMP    $src1.hi,$src2.hi
2708    emit_opcode( cbuf, 0x3B );
2709    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2710    // JNE,s  done
2711    emit_opcode(cbuf,0x75);
2712    emit_d8(cbuf, 2 );
2713    // CMP    $src1.lo,$src2.lo
2714    emit_opcode( cbuf, 0x3B );
2715    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2716// done:
2717  %}
2718
2719  enc_class convert_int_long( regL dst, eRegI src ) %{
2720    // mov $dst.lo,$src
2721    int dst_encoding = $dst$$reg;
2722    int src_encoding = $src$$reg;
2723    encode_Copy( cbuf, dst_encoding  , src_encoding );
2724    // mov $dst.hi,$src
2725    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2726    // sar $dst.hi,31
2727    emit_opcode( cbuf, 0xC1 );
2728    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2729    emit_d8(cbuf, 0x1F );
2730  %}
2731
2732  enc_class convert_long_double( eRegL src ) %{
2733    // push $src.hi
2734    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2735    // push $src.lo
2736    emit_opcode(cbuf, 0x50+$src$$reg  );
2737    // fild 64-bits at [SP]
2738    emit_opcode(cbuf,0xdf);
2739    emit_d8(cbuf, 0x6C);
2740    emit_d8(cbuf, 0x24);
2741    emit_d8(cbuf, 0x00);
2742    // pop stack
2743    emit_opcode(cbuf, 0x83); // add  SP, #8
2744    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2745    emit_d8(cbuf, 0x8);
2746  %}
2747
2748  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2749    // IMUL   EDX:EAX,$src1
2750    emit_opcode( cbuf, 0xF7 );
2751    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2752    // SAR    EDX,$cnt-32
2753    int shift_count = ((int)$cnt$$constant) - 32;
2754    if (shift_count > 0) {
2755      emit_opcode(cbuf, 0xC1);
2756      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2757      emit_d8(cbuf, shift_count);
2758    }
2759  %}
2760
2761  // this version doesn't have add sp, 8
2762  enc_class convert_long_double2( eRegL src ) %{
2763    // push $src.hi
2764    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2765    // push $src.lo
2766    emit_opcode(cbuf, 0x50+$src$$reg  );
2767    // fild 64-bits at [SP]
2768    emit_opcode(cbuf,0xdf);
2769    emit_d8(cbuf, 0x6C);
2770    emit_d8(cbuf, 0x24);
2771    emit_d8(cbuf, 0x00);
2772  %}
2773
2774  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2775    // Basic idea: long = (long)int * (long)int
2776    // IMUL EDX:EAX, src
2777    emit_opcode( cbuf, 0xF7 );
2778    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2779  %}
2780
2781  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2782    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2783    // MUL EDX:EAX, src
2784    emit_opcode( cbuf, 0xF7 );
2785    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2786  %}
2787
2788  enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
2789    // Basic idea: lo(result) = lo(x_lo * y_lo)
2790    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2791    // MOV    $tmp,$src.lo
2792    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2793    // IMUL   $tmp,EDX
2794    emit_opcode( cbuf, 0x0F );
2795    emit_opcode( cbuf, 0xAF );
2796    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2797    // MOV    EDX,$src.hi
2798    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2799    // IMUL   EDX,EAX
2800    emit_opcode( cbuf, 0x0F );
2801    emit_opcode( cbuf, 0xAF );
2802    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2803    // ADD    $tmp,EDX
2804    emit_opcode( cbuf, 0x03 );
2805    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2806    // MUL   EDX:EAX,$src.lo
2807    emit_opcode( cbuf, 0xF7 );
2808    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2809    // ADD    EDX,ESI
2810    emit_opcode( cbuf, 0x03 );
2811    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2812  %}
2813
2814  enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
2815    // Basic idea: lo(result) = lo(src * y_lo)
2816    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2817    // IMUL   $tmp,EDX,$src
2818    emit_opcode( cbuf, 0x6B );
2819    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2820    emit_d8( cbuf, (int)$src$$constant );
2821    // MOV    EDX,$src
2822    emit_opcode(cbuf, 0xB8 + EDX_enc);
2823    emit_d32( cbuf, (int)$src$$constant );
2824    // MUL   EDX:EAX,EDX
2825    emit_opcode( cbuf, 0xF7 );
2826    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2827    // ADD    EDX,ESI
2828    emit_opcode( cbuf, 0x03 );
2829    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2830  %}
2831
2832  enc_class long_div( eRegL src1, eRegL src2 ) %{
2833    // PUSH src1.hi
2834    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2835    // PUSH src1.lo
2836    emit_opcode(cbuf,               0x50+$src1$$reg  );
2837    // PUSH src2.hi
2838    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2839    // PUSH src2.lo
2840    emit_opcode(cbuf,               0x50+$src2$$reg  );
2841    // CALL directly to the runtime
2842    cbuf.set_insts_mark();
2843    emit_opcode(cbuf,0xE8);       // Call into runtime
2844    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2845    // Restore stack
2846    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2847    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2848    emit_d8(cbuf, 4*4);
2849  %}
2850
2851  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2852    // PUSH src1.hi
2853    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2854    // PUSH src1.lo
2855    emit_opcode(cbuf,               0x50+$src1$$reg  );
2856    // PUSH src2.hi
2857    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2858    // PUSH src2.lo
2859    emit_opcode(cbuf,               0x50+$src2$$reg  );
2860    // CALL directly to the runtime
2861    cbuf.set_insts_mark();
2862    emit_opcode(cbuf,0xE8);       // Call into runtime
2863    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2864    // Restore stack
2865    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2866    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2867    emit_d8(cbuf, 4*4);
2868  %}
2869
2870  enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
2871    // MOV   $tmp,$src.lo
2872    emit_opcode(cbuf, 0x8B);
2873    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2874    // OR    $tmp,$src.hi
2875    emit_opcode(cbuf, 0x0B);
2876    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2877  %}
2878
2879  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2880    // CMP    $src1.lo,$src2.lo
2881    emit_opcode( cbuf, 0x3B );
2882    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2883    // JNE,s  skip
2884    emit_cc(cbuf, 0x70, 0x5);
2885    emit_d8(cbuf,2);
2886    // CMP    $src1.hi,$src2.hi
2887    emit_opcode( cbuf, 0x3B );
2888    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2889  %}
2890
2891  enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
2892    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2893    emit_opcode( cbuf, 0x3B );
2894    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2895    // MOV    $tmp,$src1.hi
2896    emit_opcode( cbuf, 0x8B );
2897    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2898    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2899    emit_opcode( cbuf, 0x1B );
2900    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2901  %}
2902
2903  enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
2904    // XOR    $tmp,$tmp
2905    emit_opcode(cbuf,0x33);  // XOR
2906    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2907    // CMP    $tmp,$src.lo
2908    emit_opcode( cbuf, 0x3B );
2909    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2910    // SBB    $tmp,$src.hi
2911    emit_opcode( cbuf, 0x1B );
2912    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2913  %}
2914
2915 // Sniff, sniff... smells like Gnu Superoptimizer
2916  enc_class neg_long( eRegL dst ) %{
2917    emit_opcode(cbuf,0xF7);    // NEG hi
2918    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2919    emit_opcode(cbuf,0xF7);    // NEG lo
2920    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2921    emit_opcode(cbuf,0x83);    // SBB hi,0
2922    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2923    emit_d8    (cbuf,0 );
2924  %}
2925
2926
2927  // Because the transitions from emitted code to the runtime
2928  // monitorenter/exit helper stubs are so slow it's critical that
2929  // we inline both the stack-locking fast-path and the inflated fast path.
2930  //
2931  // See also: cmpFastLock and cmpFastUnlock.
2932  //
2933  // What follows is a specialized inline transliteration of the code
2934  // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
2935  // another option would be to emit TrySlowEnter and TrySlowExit methods
2936  // at startup-time.  These methods would accept arguments as
2937  // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
2938  // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
2939  // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
2940  // In practice, however, the # of lock sites is bounded and is usually small.
2941  // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
2942  // if the processor uses simple bimodal branch predictors keyed by EIP
2943  // Since the helper routines would be called from multiple synchronization
2944  // sites.
2945  //
2946  // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
2947  // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
2948  // to those specialized methods.  That'd give us a mostly platform-independent
2949  // implementation that the JITs could optimize and inline at their pleasure.
2950  // Done correctly, the only time we'd need to cross to native could would be
2951  // to park() or unpark() threads.  We'd also need a few more unsafe operators
2952  // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
2953  // (b) explicit barriers or fence operations.
2954  //
2955  // TODO:
2956  //
2957  // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
2958  //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
2959  //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
2960  //    the lock operators would typically be faster than reifying Self.
2961  //
2962  // *  Ideally I'd define the primitives as:
2963  //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
2964  //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
2965  //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
2966  //    Instead, we're stuck with a rather awkward and brittle register assignments below.
2967  //    Furthermore the register assignments are overconstrained, possibly resulting in
2968  //    sub-optimal code near the synchronization site.
2969  //
2970  // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
2971  //    Alternately, use a better sp-proximity test.
2972  //
2973  // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
2974  //    Either one is sufficient to uniquely identify a thread.
2975  //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
2976  //
2977  // *  Intrinsify notify() and notifyAll() for the common cases where the
2978  //    object is locked by the calling thread but the waitlist is empty.
2979  //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
2980  //
2981  // *  use jccb and jmpb instead of jcc and jmp to improve code density.
2982  //    But beware of excessive branch density on AMD Opterons.
2983  //
2984  // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
2985  //    or failure of the fast-path.  If the fast-path fails then we pass
2986  //    control to the slow-path, typically in C.  In Fast_Lock and
2987  //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
2988  //    will emit a conditional branch immediately after the node.
2989  //    So we have branches to branches and lots of ICC.ZF games.
2990  //    Instead, it might be better to have C2 pass a "FailureLabel"
2991  //    into Fast_Lock and Fast_Unlock.  In the case of success, control
2992  //    will drop through the node.  ICC.ZF is undefined at exit.
2993  //    In the case of failure, the node will branch directly to the
2994  //    FailureLabel
2995
2996
2997  // obj: object to lock
2998  // box: on-stack box address (displaced header location) - KILLED
2999  // rax,: tmp -- KILLED
3000  // scr: tmp -- KILLED
3001  enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3002
3003    Register objReg = as_Register($obj$$reg);
3004    Register boxReg = as_Register($box$$reg);
3005    Register tmpReg = as_Register($tmp$$reg);
3006    Register scrReg = as_Register($scr$$reg);
3007
3008    // Ensure the register assignents are disjoint
3009    guarantee (objReg != boxReg, "") ;
3010    guarantee (objReg != tmpReg, "") ;
3011    guarantee (objReg != scrReg, "") ;
3012    guarantee (boxReg != tmpReg, "") ;
3013    guarantee (boxReg != scrReg, "") ;
3014    guarantee (tmpReg == as_Register(EAX_enc), "") ;
3015
3016    MacroAssembler masm(&cbuf);
3017
3018    if (_counters != NULL) {
3019      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3020    }
3021    if (EmitSync & 1) {
3022        // set box->dhw = unused_mark (3)
3023        // Force all sync thru slow-path: slow_enter() and slow_exit() 
3024        masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3025        masm.cmpptr (rsp, (int32_t)0) ;                        
3026    } else 
3027    if (EmitSync & 2) { 
3028        Label DONE_LABEL ;           
3029        if (UseBiasedLocking) {
3030           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3031           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3032        }
3033
3034        masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3035        masm.orptr (tmpReg, 0x1);
3036        masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3037        if (os::is_MP()) { masm.lock();  }
3038        masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3039        masm.jcc(Assembler::equal, DONE_LABEL);
3040        // Recursive locking
3041        masm.subptr(tmpReg, rsp);
3042        masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3043        masm.movptr(Address(boxReg, 0), tmpReg);
3044        masm.bind(DONE_LABEL) ; 
3045    } else {  
3046      // Possible cases that we'll encounter in fast_lock 
3047      // ------------------------------------------------
3048      // * Inflated
3049      //    -- unlocked
3050      //    -- Locked
3051      //       = by self
3052      //       = by other
3053      // * biased
3054      //    -- by Self
3055      //    -- by other
3056      // * neutral
3057      // * stack-locked
3058      //    -- by self
3059      //       = sp-proximity test hits
3060      //       = sp-proximity test generates false-negative
3061      //    -- by other
3062      //
3063
3064      Label IsInflated, DONE_LABEL, PopDone ;
3065
3066      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3067      // order to reduce the number of conditional branches in the most common cases.
3068      // Beware -- there's a subtle invariant that fetch of the markword
3069      // at [FETCH], below, will never observe a biased encoding (*101b).
3070      // If this invariant is not held we risk exclusion (safety) failure.
3071      if (UseBiasedLocking && !UseOptoBiasInlining) {
3072        masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3073      }
3074
3075      masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3076      masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3077      masm.jccb  (Assembler::notZero, IsInflated) ;
3078
3079      // Attempt stack-locking ...
3080      masm.orptr (tmpReg, 0x1);
3081      masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3082      if (os::is_MP()) { masm.lock();  }
3083      masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3084      if (_counters != NULL) {
3085        masm.cond_inc32(Assembler::equal,
3086                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3087      }
3088      masm.jccb (Assembler::equal, DONE_LABEL);
3089
3090      // Recursive locking
3091      masm.subptr(tmpReg, rsp);
3092      masm.andptr(tmpReg, 0xFFFFF003 );
3093      masm.movptr(Address(boxReg, 0), tmpReg);
3094      if (_counters != NULL) {
3095        masm.cond_inc32(Assembler::equal,
3096                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3097      }
3098      masm.jmp  (DONE_LABEL) ;
3099
3100      masm.bind (IsInflated) ;
3101
3102      // The object is inflated.
3103      //
3104      // TODO-FIXME: eliminate the ugly use of manifest constants:
3105      //   Use markOopDesc::monitor_value instead of "2".
3106      //   use markOop::unused_mark() instead of "3".
3107      // The tmpReg value is an objectMonitor reference ORed with
3108      // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3109      // objectmonitor pointer by masking off the "2" bit or we can just
3110      // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3111      // field offsets with "-2" to compensate for and annul the low-order tag bit.
3112      //
3113      // I use the latter as it avoids AGI stalls.
3114      // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3115      // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3116      //
3117      #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3118
3119      // boxReg refers to the on-stack BasicLock in the current frame.
3120      // We'd like to write:
3121      //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3122      // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3123      // additional latency as we have another ST in the store buffer that must drain.
3124
3125      if (EmitSync & 8192) { 
3126         masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3127         masm.get_thread (scrReg) ; 
3128         masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3129         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3130         if (os::is_MP()) { masm.lock(); } 
3131         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3132      } else 
3133      if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3134         masm.movptr(scrReg, boxReg) ; 
3135         masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3136
3137         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3138         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3139            // prefetchw [eax + Offset(_owner)-2]
3140            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3141         }
3142
3143         if ((EmitSync & 64) == 0) {
3144           // Optimistic form: consider XORL tmpReg,tmpReg
3145           masm.movptr(tmpReg, NULL_WORD) ; 
3146         } else { 
3147           // Can suffer RTS->RTO upgrades on shared or cold $ lines
3148           // Test-And-CAS instead of CAS
3149           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3150           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3151           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3152         }
3153
3154         // Appears unlocked - try to swing _owner from null to non-null.
3155         // Ideally, I'd manifest "Self" with get_thread and then attempt
3156         // to CAS the register containing Self into m->Owner.
3157         // But we don't have enough registers, so instead we can either try to CAS
3158         // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3159         // we later store "Self" into m->Owner.  Transiently storing a stack address
3160         // (rsp or the address of the box) into  m->owner is harmless.
3161         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3162         if (os::is_MP()) { masm.lock();  }
3163         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3164         masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3165         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3166         masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3167         masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3168         masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3169                       
3170         // If the CAS fails we can either retry or pass control to the slow-path.  
3171         // We use the latter tactic.  
3172         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3173         // If the CAS was successful ...
3174         //   Self has acquired the lock
3175         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3176         // Intentional fall-through into DONE_LABEL ...
3177      } else {
3178         masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3179         masm.movptr(boxReg, tmpReg) ; 
3180
3181         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3182         if ((EmitSync & 2048) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3183            // prefetchw [eax + Offset(_owner)-2]
3184            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3185         }
3186
3187         if ((EmitSync & 64) == 0) {
3188           // Optimistic form
3189           masm.xorptr  (tmpReg, tmpReg) ; 
3190         } else { 
3191           // Can suffer RTS->RTO upgrades on shared or cold $ lines
3192           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3193           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3194           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3195         }
3196
3197         // Appears unlocked - try to swing _owner from null to non-null.
3198         // Use either "Self" (in scr) or rsp as thread identity in _owner.
3199         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3200         masm.get_thread (scrReg) ;
3201         if (os::is_MP()) { masm.lock(); }
3202         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3203
3204         // If the CAS fails we can either retry or pass control to the slow-path.
3205         // We use the latter tactic.
3206         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3207         // If the CAS was successful ...
3208         //   Self has acquired the lock
3209         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3210         // Intentional fall-through into DONE_LABEL ...
3211      }
3212
3213      // DONE_LABEL is a hot target - we'd really like to place it at the
3214      // start of cache line by padding with NOPs.
3215      // See the AMD and Intel software optimization manuals for the
3216      // most efficient "long" NOP encodings.
3217      // Unfortunately none of our alignment mechanisms suffice.
3218      masm.bind(DONE_LABEL);
3219
3220      // Avoid branch-to-branch on AMD processors
3221      // This appears to be superstition.
3222      if (EmitSync & 32) masm.nop() ;
3223
3224
3225      // At DONE_LABEL the icc ZFlag is set as follows ...
3226      // Fast_Unlock uses the same protocol.
3227      // ZFlag == 1 -> Success
3228      // ZFlag == 0 -> Failure - force control through the slow-path
3229    }
3230  %}
3231
3232  // obj: object to unlock
3233  // box: box address (displaced header location), killed.  Must be EAX.
3234  // rbx,: killed tmp; cannot be obj nor box.
3235  //
3236  // Some commentary on balanced locking:
3237  //
3238  // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3239  // Methods that don't have provably balanced locking are forced to run in the
3240  // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3241  // The interpreter provides two properties:
3242  // I1:  At return-time the interpreter automatically and quietly unlocks any
3243  //      objects acquired the current activation (frame).  Recall that the
3244  //      interpreter maintains an on-stack list of locks currently held by
3245  //      a frame.
3246  // I2:  If a method attempts to unlock an object that is not held by the
3247  //      the frame the interpreter throws IMSX.
3248  //
3249  // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3250  // B() doesn't have provably balanced locking so it runs in the interpreter.
3251  // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3252  // is still locked by A().
3253  //
3254  // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3255  // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3256  // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3257  // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3258
3259  enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3260
3261    Register objReg = as_Register($obj$$reg);
3262    Register boxReg = as_Register($box$$reg);
3263    Register tmpReg = as_Register($tmp$$reg);
3264
3265    guarantee (objReg != boxReg, "") ;
3266    guarantee (objReg != tmpReg, "") ;
3267    guarantee (boxReg != tmpReg, "") ;
3268    guarantee (boxReg == as_Register(EAX_enc), "") ;
3269    MacroAssembler masm(&cbuf);
3270
3271    if (EmitSync & 4) {
3272      // Disable - inhibit all inlining.  Force control through the slow-path
3273      masm.cmpptr (rsp, 0) ; 
3274    } else 
3275    if (EmitSync & 8) {
3276      Label DONE_LABEL ;
3277      if (UseBiasedLocking) {
3278         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3279      }
3280      // classic stack-locking code ...
3281      masm.movptr(tmpReg, Address(boxReg, 0)) ;
3282      masm.testptr(tmpReg, tmpReg) ;
3283      masm.jcc   (Assembler::zero, DONE_LABEL) ;
3284      if (os::is_MP()) { masm.lock(); }
3285      masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3286      masm.bind(DONE_LABEL);
3287    } else {
3288      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3289
3290      // Critically, the biased locking test must have precedence over
3291      // and appear before the (box->dhw == 0) recursive stack-lock test.
3292      if (UseBiasedLocking && !UseOptoBiasInlining) {
3293         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3294      }
3295      
3296      masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3297      masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3298      masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3299
3300      masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3301      masm.jccb  (Assembler::zero, Stacked) ;
3302
3303      masm.bind  (Inflated) ;
3304      // It's inflated.
3305      // Despite our balanced locking property we still check that m->_owner == Self
3306      // as java routines or native JNI code called by this thread might
3307      // have released the lock.
3308      // Refer to the comments in synchronizer.cpp for how we might encode extra
3309      // state in _succ so we can avoid fetching EntryList|cxq.
3310      //
3311      // I'd like to add more cases in fast_lock() and fast_unlock() --
3312      // such as recursive enter and exit -- but we have to be wary of
3313      // I$ bloat, T$ effects and BP$ effects.
3314      //
3315      // If there's no contention try a 1-0 exit.  That is, exit without
3316      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3317      // we detect and recover from the race that the 1-0 exit admits.
3318      //
3319      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3320      // before it STs null into _owner, releasing the lock.  Updates
3321      // to data protected by the critical section must be visible before
3322      // we drop the lock (and thus before any other thread could acquire
3323      // the lock and observe the fields protected by the lock).
3324      // IA32's memory-model is SPO, so STs are ordered with respect to
3325      // each other and there's no need for an explicit barrier (fence).
3326      // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3327
3328      masm.get_thread (boxReg) ;
3329      if ((EmitSync & 4096) && VM_Version::supports_3dnow_prefetch() && os::is_MP()) {
3330        // prefetchw [ebx + Offset(_owner)-2]
3331        masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3332      }
3333
3334      // Note that we could employ various encoding schemes to reduce
3335      // the number of loads below (currently 4) to just 2 or 3.
3336      // Refer to the comments in synchronizer.cpp.
3337      // In practice the chain of fetches doesn't seem to impact performance, however.
3338      if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3339         // Attempt to reduce branch density - AMD's branch predictor.
3340         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3341         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3342         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3343         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3344         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3345         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3346         masm.jmpb  (DONE_LABEL) ; 
3347      } else { 
3348         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3349         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3350         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3351         masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3352         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3353         masm.jccb  (Assembler::notZero, CheckSucc) ; 
3354         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3355         masm.jmpb  (DONE_LABEL) ; 
3356      }
3357
3358      // The Following code fragment (EmitSync & 65536) improves the performance of
3359      // contended applications and contended synchronization microbenchmarks.
3360      // Unfortunately the emission of the code - even though not executed - causes regressions
3361      // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3362      // with an equal number of never-executed NOPs results in the same regression.
3363      // We leave it off by default.
3364
3365      if ((EmitSync & 65536) != 0) {
3366         Label LSuccess, LGoSlowPath ;
3367
3368         masm.bind  (CheckSucc) ;
3369
3370         // Optional pre-test ... it's safe to elide this
3371         if ((EmitSync & 16) == 0) { 
3372            masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3373            masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3374         }
3375
3376         // We have a classic Dekker-style idiom:
3377         //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3378         // There are a number of ways to implement the barrier:
3379         // (1) lock:andl &m->_owner, 0
3380         //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3381         //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3382         //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3383         // (2) If supported, an explicit MFENCE is appealing.
3384         //     In older IA32 processors MFENCE is slower than lock:add or xchg
3385         //     particularly if the write-buffer is full as might be the case if
3386         //     if stores closely precede the fence or fence-equivalent instruction.
3387         //     In more modern implementations MFENCE appears faster, however.
3388         // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3389         //     The $lines underlying the top-of-stack should be in M-state.
3390         //     The locked add instruction is serializing, of course.
3391         // (4) Use xchg, which is serializing
3392         //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3393         // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3394         //     The integer condition codes will tell us if succ was 0.
3395         //     Since _succ and _owner should reside in the same $line and
3396         //     we just stored into _owner, it's likely that the $line
3397         //     remains in M-state for the lock:orl.
3398         //
3399         // We currently use (3), although it's likely that switching to (2)
3400         // is correct for the future.
3401            
3402         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3403         if (os::is_MP()) { 
3404            if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3405              masm.mfence();
3406            } else { 
3407              masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3408            }
3409         }
3410         // Ratify _succ remains non-null
3411         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3412         masm.jccb  (Assembler::notZero, LSuccess) ; 
3413
3414         masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3415         if (os::is_MP()) { masm.lock(); }
3416         masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3417         masm.jccb  (Assembler::notEqual, LSuccess) ;
3418         // Since we're low on registers we installed rsp as a placeholding in _owner.
3419         // Now install Self over rsp.  This is safe as we're transitioning from
3420         // non-null to non=null
3421         masm.get_thread (boxReg) ;
3422         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3423         // Intentional fall-through into LGoSlowPath ...
3424
3425         masm.bind  (LGoSlowPath) ; 
3426         masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3427         masm.jmpb  (DONE_LABEL) ; 
3428
3429         masm.bind  (LSuccess) ; 
3430         masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3431         masm.jmpb  (DONE_LABEL) ; 
3432      }
3433
3434      masm.bind (Stacked) ;
3435      // It's not inflated and it's not recursively stack-locked and it's not biased.
3436      // It must be stack-locked.
3437      // Try to reset the header to displaced header.
3438      // The "box" value on the stack is stable, so we can reload
3439      // and be assured we observe the same value as above.
3440      masm.movptr(tmpReg, Address(boxReg, 0)) ;
3441      if (os::is_MP()) {   masm.lock();    }
3442      masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3443      // Intention fall-thru into DONE_LABEL
3444
3445
3446      // DONE_LABEL is a hot target - we'd really like to place it at the
3447      // start of cache line by padding with NOPs.
3448      // See the AMD and Intel software optimization manuals for the
3449      // most efficient "long" NOP encodings.
3450      // Unfortunately none of our alignment mechanisms suffice.
3451      if ((EmitSync & 65536) == 0) {
3452         masm.bind (CheckSucc) ;
3453      }
3454      masm.bind(DONE_LABEL);
3455
3456      // Avoid branch to branch on AMD processors
3457      if (EmitSync & 32768) { masm.nop() ; }
3458    }
3459  %}
3460
3461
3462  enc_class enc_pop_rdx() %{
3463    emit_opcode(cbuf,0x5A);
3464  %}
3465
3466  enc_class enc_rethrow() %{
3467    cbuf.set_insts_mark();
3468    emit_opcode(cbuf, 0xE9);        // jmp    entry
3469    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
3470                   runtime_call_Relocation::spec(), RELOC_IMM32 );
3471  %}
3472
3473
3474  // Convert a double to an int.  Java semantics require we do complex
3475  // manglelations in the corner cases.  So we set the rounding mode to
3476  // 'zero', store the darned double down as an int, and reset the
3477  // rounding mode to 'nearest'.  The hardware throws an exception which
3478  // patches up the correct value directly to the stack.
3479  enc_class DPR2I_encoding( regDPR src ) %{
3480    // Flip to round-to-zero mode.  We attempted to allow invalid-op
3481    // exceptions here, so that a NAN or other corner-case value will
3482    // thrown an exception (but normal values get converted at full speed).
3483    // However, I2C adapters and other float-stack manglers leave pending
3484    // invalid-op exceptions hanging.  We would have to clear them before
3485    // enabling them and that is more expensive than just testing for the
3486    // invalid value Intel stores down in the corner cases.
3487    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3488    emit_opcode(cbuf,0x2D);
3489    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3490    // Allocate a word
3491    emit_opcode(cbuf,0x83);            // SUB ESP,4
3492    emit_opcode(cbuf,0xEC);
3493    emit_d8(cbuf,0x04);
3494    // Encoding assumes a double has been pushed into FPR0.
3495    // Store down the double as an int, popping the FPU stack
3496    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3497    emit_opcode(cbuf,0x1C);
3498    emit_d8(cbuf,0x24);
3499    // Restore the rounding mode; mask the exception
3500    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3501    emit_opcode(cbuf,0x2D);
3502    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3503        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3504        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3505
3506    // Load the converted int; adjust CPU stack
3507    emit_opcode(cbuf,0x58);       // POP EAX
3508    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3509    emit_d32   (cbuf,0x80000000); //         0x80000000
3510    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3511    emit_d8    (cbuf,0x07);       // Size of slow_call
3512    // Push src onto stack slow-path
3513    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3514    emit_d8    (cbuf,0xC0-1+$src$$reg );
3515    // CALL directly to the runtime
3516    cbuf.set_insts_mark();
3517    emit_opcode(cbuf,0xE8);       // Call into runtime
3518    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3519    // Carry on here...
3520  %}
3521
3522  enc_class DPR2L_encoding( regDPR src ) %{
3523    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3524    emit_opcode(cbuf,0x2D);
3525    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3526    // Allocate a word
3527    emit_opcode(cbuf,0x83);            // SUB ESP,8
3528    emit_opcode(cbuf,0xEC);
3529    emit_d8(cbuf,0x08);
3530    // Encoding assumes a double has been pushed into FPR0.
3531    // Store down the double as a long, popping the FPU stack
3532    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3533    emit_opcode(cbuf,0x3C);
3534    emit_d8(cbuf,0x24);
3535    // Restore the rounding mode; mask the exception
3536    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3537    emit_opcode(cbuf,0x2D);
3538    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3539        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3540        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3541
3542    // Load the converted int; adjust CPU stack
3543    emit_opcode(cbuf,0x58);       // POP EAX
3544    emit_opcode(cbuf,0x5A);       // POP EDX
3545    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3546    emit_d8    (cbuf,0xFA);       // rdx
3547    emit_d32   (cbuf,0x80000000); //         0x80000000
3548    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3549    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3550    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3551    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3552    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3553    emit_d8    (cbuf,0x07);       // Size of slow_call
3554    // Push src onto stack slow-path
3555    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3556    emit_d8    (cbuf,0xC0-1+$src$$reg );
3557    // CALL directly to the runtime
3558    cbuf.set_insts_mark();
3559    emit_opcode(cbuf,0xE8);       // Call into runtime
3560    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3561    // Carry on here...
3562  %}
3563
3564  enc_class FMul_ST_reg( eRegFPR src1 ) %{
3565    // Operand was loaded from memory into fp ST (stack top)
3566    // FMUL   ST,$src  /* D8 C8+i */
3567    emit_opcode(cbuf, 0xD8);
3568    emit_opcode(cbuf, 0xC8 + $src1$$reg);
3569  %}
3570
3571  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3572    // FADDP  ST,src2  /* D8 C0+i */
3573    emit_opcode(cbuf, 0xD8);
3574    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3575    //could use FADDP  src2,fpST  /* DE C0+i */
3576  %}
3577
3578  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3579    // FADDP  src2,ST  /* DE C0+i */
3580    emit_opcode(cbuf, 0xDE);
3581    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3582  %}
3583
3584  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3585    // Operand has been loaded into fp ST (stack top)
3586      // FSUB   ST,$src1
3587      emit_opcode(cbuf, 0xD8);
3588      emit_opcode(cbuf, 0xE0 + $src1$$reg);
3589
3590      // FDIV
3591      emit_opcode(cbuf, 0xD8);
3592      emit_opcode(cbuf, 0xF0 + $src2$$reg);
3593  %}
3594
3595  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3596    // Operand was loaded from memory into fp ST (stack top)
3597    // FADD   ST,$src  /* D8 C0+i */
3598    emit_opcode(cbuf, 0xD8);
3599    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3600
3601    // FMUL  ST,src2  /* D8 C*+i */
3602    emit_opcode(cbuf, 0xD8);
3603    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3604  %}
3605
3606
3607  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3608    // Operand was loaded from memory into fp ST (stack top)
3609    // FADD   ST,$src  /* D8 C0+i */
3610    emit_opcode(cbuf, 0xD8);
3611    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3612
3613    // FMULP  src2,ST  /* DE C8+i */
3614    emit_opcode(cbuf, 0xDE);
3615    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3616  %}
3617
3618  // Atomically load the volatile long
3619  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3620    emit_opcode(cbuf,0xDF);
3621    int rm_byte_opcode = 0x05;
3622    int base     = $mem$$base;
3623    int index    = $mem$$index;
3624    int scale    = $mem$$scale;
3625    int displace = $mem$$disp;
3626    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3627    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3628    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3629  %}
3630
3631  // Volatile Store Long.  Must be atomic, so move it into
3632  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3633  // target address before the store (for null-ptr checks)
3634  // so the memory operand is used twice in the encoding.
3635  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3636    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3637    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3638    emit_opcode(cbuf,0xDF);
3639    int rm_byte_opcode = 0x07;
3640    int base     = $mem$$base;
3641    int index    = $mem$$index;
3642    int scale    = $mem$$scale;
3643    int displace = $mem$$disp;
3644    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
3645    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
3646  %}
3647
3648  // Safepoint Poll.  This polls the safepoint page, and causes an
3649  // exception if it is not readable. Unfortunately, it kills the condition code
3650  // in the process
3651  // We current use TESTL [spp],EDI
3652  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3653
3654  enc_class Safepoint_Poll() %{
3655    cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3656    emit_opcode(cbuf,0x85);
3657    emit_rm (cbuf, 0x0, 0x7, 0x5);
3658    emit_d32(cbuf, (intptr_t)os::get_polling_page());
3659  %}
3660%}
3661
3662
3663//----------FRAME--------------------------------------------------------------
3664// Definition of frame structure and management information.
3665//
3666//  S T A C K   L A Y O U T    Allocators stack-slot number
3667//                             |   (to get allocators register number
3668//  G  Owned by    |        |  v    add OptoReg::stack0())
3669//  r   CALLER     |        |
3670//  o     |        +--------+      pad to even-align allocators stack-slot
3671//  w     V        |  pad0  |        numbers; owned by CALLER
3672//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3673//  h     ^        |   in   |  5
3674//        |        |  args  |  4   Holes in incoming args owned by SELF
3675//  |     |        |        |  3
3676//  |     |        +--------+
3677//  V     |        | old out|      Empty on Intel, window on Sparc
3678//        |    old |preserve|      Must be even aligned.
3679//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3680//        |        |   in   |  3   area for Intel ret address
3681//     Owned by    |preserve|      Empty on Sparc.
3682//       SELF      +--------+
3683//        |        |  pad2  |  2   pad to align old SP
3684//        |        +--------+  1
3685//        |        | locks  |  0
3686//        |        +--------+----> OptoReg::stack0(), even aligned
3687//        |        |  pad1  | 11   pad to align new SP
3688//        |        +--------+
3689//        |        |        | 10
3690//        |        | spills |  9   spills
3691//        V        |        |  8   (pad0 slot for callee)
3692//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3693//        ^        |  out   |  7
3694//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3695//     Owned by    +--------+
3696//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3697//        |    new |preserve|      Must be even-aligned.
3698//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3699//        |        |        |
3700//
3701// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3702//         known from SELF's arguments and the Java calling convention.
3703//         Region 6-7 is determined per call site.
3704// Note 2: If the calling convention leaves holes in the incoming argument
3705//         area, those holes are owned by SELF.  Holes in the outgoing area
3706//         are owned by the CALLEE.  Holes should not be nessecary in the
3707//         incoming area, as the Java calling convention is completely under
3708//         the control of the AD file.  Doubles can be sorted and packed to
3709//         avoid holes.  Holes in the outgoing arguments may be nessecary for
3710//         varargs C calling conventions.
3711// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3712//         even aligned with pad0 as needed.
3713//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3714//         region 6-11 is even aligned; it may be padded out more so that
3715//         the region from SP to FP meets the minimum stack alignment.
3716
3717frame %{
3718  // What direction does stack grow in (assumed to be same for C & Java)
3719  stack_direction(TOWARDS_LOW);
3720
3721  // These three registers define part of the calling convention
3722  // between compiled code and the interpreter.
3723  inline_cache_reg(EAX);                // Inline Cache Register
3724  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3725
3726  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3727  cisc_spilling_operand_name(indOffset32);
3728
3729  // Number of stack slots consumed by locking an object
3730  sync_stack_slots(1);
3731
3732  // Compiled code's Frame Pointer
3733  frame_pointer(ESP);
3734  // Interpreter stores its frame pointer in a register which is
3735  // stored to the stack by I2CAdaptors.
3736  // I2CAdaptors convert from interpreted java to compiled java.
3737  interpreter_frame_pointer(EBP);
3738
3739  // Stack alignment requirement
3740  // Alignment size in bytes (128-bit -> 16 bytes)
3741  stack_alignment(StackAlignmentInBytes);
3742
3743  // Number of stack slots between incoming argument block and the start of
3744  // a new frame.  The PROLOG must add this many slots to the stack.  The
3745  // EPILOG must remove this many slots.  Intel needs one slot for
3746  // return address and one for rbp, (must save rbp)
3747  in_preserve_stack_slots(2+VerifyStackAtCalls);
3748
3749  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3750  // for calls to C.  Supports the var-args backing area for register parms.
3751  varargs_C_out_slots_killed(0);
3752
3753  // The after-PROLOG location of the return address.  Location of
3754  // return address specifies a type (REG or STACK) and a number
3755  // representing the register number (i.e. - use a register name) or
3756  // stack slot.
3757  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3758  // Otherwise, it is above the locks and verification slot and alignment word
3759  return_addr(STACK - 1 +
3760              round_to((Compile::current()->in_preserve_stack_slots() +
3761                        Compile::current()->fixed_slots()),
3762                       stack_alignment_in_slots()));
3763
3764  // Body of function which returns an integer array locating
3765  // arguments either in registers or in stack slots.  Passed an array
3766  // of ideal registers called "sig" and a "length" count.  Stack-slot
3767  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3768  // arguments for a CALLEE.  Incoming stack arguments are
3769  // automatically biased by the preserve_stack_slots field above.
3770  calling_convention %{
3771    // No difference between ingoing/outgoing just pass false
3772    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3773  %}
3774
3775
3776  // Body of function which returns an integer array locating
3777  // arguments either in registers or in stack slots.  Passed an array
3778  // of ideal registers called "sig" and a "length" count.  Stack-slot
3779  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3780  // arguments for a CALLEE.  Incoming stack arguments are
3781  // automatically biased by the preserve_stack_slots field above.
3782  c_calling_convention %{
3783    // This is obviously always outgoing
3784    (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
3785  %}
3786
3787  // Location of C & interpreter return values
3788  c_return_value %{
3789    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3790    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3791    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3792
3793    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3794    // that C functions return float and double results in XMM0.
3795    if( ideal_reg == Op_RegD && UseSSE>=2 )
3796      return OptoRegPair(XMM0b_num,XMM0a_num);
3797    if( ideal_reg == Op_RegF && UseSSE>=2 )
3798      return OptoRegPair(OptoReg::Bad,XMM0a_num);
3799
3800    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3801  %}
3802
3803  // Location of return values
3804  return_value %{
3805    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3806    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3807    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3808    if( ideal_reg == Op_RegD && UseSSE>=2 )
3809      return OptoRegPair(XMM0b_num,XMM0a_num);
3810    if( ideal_reg == Op_RegF && UseSSE>=1 )
3811      return OptoRegPair(OptoReg::Bad,XMM0a_num);
3812    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3813  %}
3814
3815%}
3816
3817//----------ATTRIBUTES---------------------------------------------------------
3818//----------Operand Attributes-------------------------------------------------
3819op_attrib op_cost(0);        // Required cost attribute
3820
3821//----------Instruction Attributes---------------------------------------------
3822ins_attrib ins_cost(100);       // Required cost attribute
3823ins_attrib ins_size(8);         // Required size attribute (in bits)
3824ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3825                                // non-matching short branch variant of some
3826                                                            // long branch?
3827ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3828                                // specifies the alignment that some part of the instruction (not
3829                                // necessarily the start) requires.  If > 1, a compute_padding()
3830                                // function must be provided for the instruction
3831
3832//----------OPERANDS-----------------------------------------------------------
3833// Operand definitions must precede instruction definitions for correct parsing
3834// in the ADLC because operands constitute user defined types which are used in
3835// instruction definitions.
3836
3837//----------Simple Operands----------------------------------------------------
3838// Immediate Operands
3839// Integer Immediate
3840operand immI() %{
3841  match(ConI);
3842
3843  op_cost(10);
3844  format %{ %}
3845  interface(CONST_INTER);
3846%}
3847
3848// Constant for test vs zero
3849operand immI0() %{
3850  predicate(n->get_int() == 0);
3851  match(ConI);
3852
3853  op_cost(0);
3854  format %{ %}
3855  interface(CONST_INTER);
3856%}
3857
3858// Constant for increment
3859operand immI1() %{
3860  predicate(n->get_int() == 1);
3861  match(ConI);
3862
3863  op_cost(0);
3864  format %{ %}
3865  interface(CONST_INTER);
3866%}
3867
3868// Constant for decrement
3869operand immI_M1() %{
3870  predicate(n->get_int() == -1);
3871  match(ConI);
3872
3873  op_cost(0);
3874  format %{ %}
3875  interface(CONST_INTER);
3876%}
3877
3878// Valid scale values for addressing modes
3879operand immI2() %{
3880  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3881  match(ConI);
3882
3883  format %{ %}
3884  interface(CONST_INTER);
3885%}
3886
3887operand immI8() %{
3888  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3889  match(ConI);
3890
3891  op_cost(5);
3892  format %{ %}
3893  interface(CONST_INTER);
3894%}
3895
3896operand immI16() %{
3897  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3898  match(ConI);
3899
3900  op_cost(10);
3901  format %{ %}
3902  interface(CONST_INTER);
3903%}
3904
3905// Constant for long shifts
3906operand immI_32() %{
3907  predicate( n->get_int() == 32 );
3908  match(ConI);
3909
3910  op_cost(0);
3911  format %{ %}
3912  interface(CONST_INTER);
3913%}
3914
3915operand immI_1_31() %{
3916  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3917  match(ConI);
3918
3919  op_cost(0);
3920  format %{ %}
3921  interface(CONST_INTER);
3922%}
3923
3924operand immI_32_63() %{
3925  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3926  match(ConI);
3927  op_cost(0);
3928
3929  format %{ %}
3930  interface(CONST_INTER);
3931%}
3932
3933operand immI_1() %{
3934  predicate( n->get_int() == 1 );
3935  match(ConI);
3936
3937  op_cost(0);
3938  format %{ %}
3939  interface(CONST_INTER);
3940%}
3941
3942operand immI_2() %{
3943  predicate( n->get_int() == 2 );
3944  match(ConI);
3945
3946  op_cost(0);
3947  format %{ %}
3948  interface(CONST_INTER);
3949%}
3950
3951operand immI_3() %{
3952  predicate( n->get_int() == 3 );
3953  match(ConI);
3954
3955  op_cost(0);
3956  format %{ %}
3957  interface(CONST_INTER);
3958%}
3959
3960// Pointer Immediate
3961operand immP() %{
3962  match(ConP);
3963
3964  op_cost(10);
3965  format %{ %}
3966  interface(CONST_INTER);
3967%}
3968
3969// NULL Pointer Immediate
3970operand immP0() %{
3971  predicate( n->get_ptr() == 0 );
3972  match(ConP);
3973  op_cost(0);
3974
3975  format %{ %}
3976  interface(CONST_INTER);
3977%}
3978
3979// Long Immediate
3980operand immL() %{
3981  match(ConL);
3982
3983  op_cost(20);
3984  format %{ %}
3985  interface(CONST_INTER);
3986%}
3987
3988// Long Immediate zero
3989operand immL0() %{
3990  predicate( n->get_long() == 0L );
3991  match(ConL);
3992  op_cost(0);
3993
3994  format %{ %}
3995  interface(CONST_INTER);
3996%}
3997
3998// Long Immediate zero
3999operand immL_M1() %{
4000  predicate( n->get_long() == -1L );
4001  match(ConL);
4002  op_cost(0);
4003
4004  format %{ %}
4005  interface(CONST_INTER);
4006%}
4007
4008// Long immediate from 0 to 127.
4009// Used for a shorter form of long mul by 10.
4010operand immL_127() %{
4011  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4012  match(ConL);
4013  op_cost(0);
4014
4015  format %{ %}
4016  interface(CONST_INTER);
4017%}
4018
4019// Long Immediate: low 32-bit mask
4020operand immL_32bits() %{
4021  predicate(n->get_long() == 0xFFFFFFFFL);
4022  match(ConL);
4023  op_cost(0);
4024
4025  format %{ %}
4026  interface(CONST_INTER);
4027%}
4028
4029// Long Immediate: low 32-bit mask
4030operand immL32() %{
4031  predicate(n->get_long() == (int)(n->get_long()));
4032  match(ConL);
4033  op_cost(20);
4034
4035  format %{ %}
4036  interface(CONST_INTER);
4037%}
4038
4039//Double Immediate zero
4040operand immDPR0() %{
4041  // Do additional (and counter-intuitive) test against NaN to work around VC++
4042  // bug that generates code such that NaNs compare equal to 0.0
4043  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4044  match(ConD);
4045
4046  op_cost(5);
4047  format %{ %}
4048  interface(CONST_INTER);
4049%}
4050
4051// Double Immediate one
4052operand immDPR1() %{
4053  predicate( UseSSE<=1 && n->getd() == 1.0 );
4054  match(ConD);
4055
4056  op_cost(5);
4057  format %{ %}
4058  interface(CONST_INTER);
4059%}
4060
4061// Double Immediate
4062operand immDPR() %{
4063  predicate(UseSSE<=1);
4064  match(ConD);
4065
4066  op_cost(5);
4067  format %{ %}
4068  interface(CONST_INTER);
4069%}
4070
4071operand immD() %{
4072  predicate(UseSSE>=2);
4073  match(ConD);
4074
4075  op_cost(5);
4076  format %{ %}
4077  interface(CONST_INTER);
4078%}
4079
4080// Double Immediate zero
4081operand immD0() %{
4082  // Do additional (and counter-intuitive) test against NaN to work around VC++
4083  // bug that generates code such that NaNs compare equal to 0.0 AND do not
4084  // compare equal to -0.0.
4085  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4086  match(ConD);
4087
4088  format %{ %}
4089  interface(CONST_INTER);
4090%}
4091
4092// Float Immediate zero
4093operand immFPR0() %{
4094  predicate(UseSSE == 0 && n->getf() == 0.0F);
4095  match(ConF);
4096
4097  op_cost(5);
4098  format %{ %}
4099  interface(CONST_INTER);
4100%}
4101
4102// Float Immediate one
4103operand immFPR1() %{
4104  predicate(UseSSE == 0 && n->getf() == 1.0F);
4105  match(ConF);
4106
4107  op_cost(5);
4108  format %{ %}
4109  interface(CONST_INTER);
4110%}
4111
4112// Float Immediate
4113operand immFPR() %{
4114  predicate( UseSSE == 0 );
4115  match(ConF);
4116
4117  op_cost(5);
4118  format %{ %}
4119  interface(CONST_INTER);
4120%}
4121
4122// Float Immediate
4123operand immF() %{
4124  predicate(UseSSE >= 1);
4125  match(ConF);
4126
4127  op_cost(5);
4128  format %{ %}
4129  interface(CONST_INTER);
4130%}
4131
4132// Float Immediate zero.  Zero and not -0.0
4133operand immF0() %{
4134  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4135  match(ConF);
4136
4137  op_cost(5);
4138  format %{ %}
4139  interface(CONST_INTER);
4140%}
4141
4142// Immediates for special shifts (sign extend)
4143
4144// Constants for increment
4145operand immI_16() %{
4146  predicate( n->get_int() == 16 );
4147  match(ConI);
4148
4149  format %{ %}
4150  interface(CONST_INTER);
4151%}
4152
4153operand immI_24() %{
4154  predicate( n->get_int() == 24 );
4155  match(ConI);
4156
4157  format %{ %}
4158  interface(CONST_INTER);
4159%}
4160
4161// Constant for byte-wide masking
4162operand immI_255() %{
4163  predicate( n->get_int() == 255 );
4164  match(ConI);
4165
4166  format %{ %}
4167  interface(CONST_INTER);
4168%}
4169
4170// Constant for short-wide masking
4171operand immI_65535() %{
4172  predicate(n->get_int() == 65535);
4173  match(ConI);
4174
4175  format %{ %}
4176  interface(CONST_INTER);
4177%}
4178
4179// Register Operands
4180// Integer Register
4181operand eRegI() %{
4182  constraint(ALLOC_IN_RC(e_reg));
4183  match(RegI);
4184  match(xRegI);
4185  match(eAXRegI);
4186  match(eBXRegI);
4187  match(eCXRegI);
4188  match(eDXRegI);
4189  match(eDIRegI);
4190  match(eSIRegI);
4191
4192  format %{ %}
4193  interface(REG_INTER);
4194%}
4195
4196// Subset of Integer Register
4197operand xRegI(eRegI reg) %{
4198  constraint(ALLOC_IN_RC(x_reg));
4199  match(reg);
4200  match(eAXRegI);
4201  match(eBXRegI);
4202  match(eCXRegI);
4203  match(eDXRegI);
4204
4205  format %{ %}
4206  interface(REG_INTER);
4207%}
4208
4209// Special Registers
4210operand eAXRegI(xRegI reg) %{
4211  constraint(ALLOC_IN_RC(eax_reg));
4212  match(reg);
4213  match(eRegI);
4214
4215  format %{ "EAX" %}
4216  interface(REG_INTER);
4217%}
4218
4219// Special Registers
4220operand eBXRegI(xRegI reg) %{
4221  constraint(ALLOC_IN_RC(ebx_reg));
4222  match(reg);
4223  match(eRegI);
4224
4225  format %{ "EBX" %}
4226  interface(REG_INTER);
4227%}
4228
4229operand eCXRegI(xRegI reg) %{
4230  constraint(ALLOC_IN_RC(ecx_reg));
4231  match(reg);
4232  match(eRegI);
4233
4234  format %{ "ECX" %}
4235  interface(REG_INTER);
4236%}
4237
4238operand eDXRegI(xRegI reg) %{
4239  constraint(ALLOC_IN_RC(edx_reg));
4240  match(reg);
4241  match(eRegI);
4242
4243  format %{ "EDX" %}
4244  interface(REG_INTER);
4245%}
4246
4247operand eDIRegI(xRegI reg) %{
4248  constraint(ALLOC_IN_RC(edi_reg));
4249  match(reg);
4250  match(eRegI);
4251
4252  format %{ "EDI" %}
4253  interface(REG_INTER);
4254%}
4255
4256operand naxRegI() %{
4257  constraint(ALLOC_IN_RC(nax_reg));
4258  match(RegI);
4259  match(eCXRegI);
4260  match(eDXRegI);
4261  match(eSIRegI);
4262  match(eDIRegI);
4263
4264  format %{ %}
4265  interface(REG_INTER);
4266%}
4267
4268operand nadxRegI() %{
4269  constraint(ALLOC_IN_RC(nadx_reg));
4270  match(RegI);
4271  match(eBXRegI);
4272  match(eCXRegI);
4273  match(eSIRegI);
4274  match(eDIRegI);
4275
4276  format %{ %}
4277  interface(REG_INTER);
4278%}
4279
4280operand ncxRegI() %{
4281  constraint(ALLOC_IN_RC(ncx_reg));
4282  match(RegI);
4283  match(eAXRegI);
4284  match(eDXRegI);
4285  match(eSIRegI);
4286  match(eDIRegI);
4287
4288  format %{ %}
4289  interface(REG_INTER);
4290%}
4291
4292// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
4293// //
4294operand eSIRegI(xRegI reg) %{
4295   constraint(ALLOC_IN_RC(esi_reg));
4296   match(reg);
4297   match(eRegI);
4298
4299   format %{ "ESI" %}
4300   interface(REG_INTER);
4301%}
4302
4303// Pointer Register
4304operand anyRegP() %{
4305  constraint(ALLOC_IN_RC(any_reg));
4306  match(RegP);
4307  match(eAXRegP);
4308  match(eBXRegP);
4309  match(eCXRegP);
4310  match(eDIRegP);
4311  match(eRegP);
4312
4313  format %{ %}
4314  interface(REG_INTER);
4315%}
4316
4317operand eRegP() %{
4318  constraint(ALLOC_IN_RC(e_reg));
4319  match(RegP);
4320  match(eAXRegP);
4321  match(eBXRegP);
4322  match(eCXRegP);
4323  match(eDIRegP);
4324
4325  format %{ %}
4326  interface(REG_INTER);
4327%}
4328
4329// On windows95, EBP is not safe to use for implicit null tests.
4330operand eRegP_no_EBP() %{
4331  constraint(ALLOC_IN_RC(e_reg_no_rbp));
4332  match(RegP);
4333  match(eAXRegP);
4334  match(eBXRegP);
4335  match(eCXRegP);
4336  match(eDIRegP);
4337
4338  op_cost(100);
4339  format %{ %}
4340  interface(REG_INTER);
4341%}
4342
4343operand naxRegP() %{
4344  constraint(ALLOC_IN_RC(nax_reg));
4345  match(RegP);
4346  match(eBXRegP);
4347  match(eDXRegP);
4348  match(eCXRegP);
4349  match(eSIRegP);
4350  match(eDIRegP);
4351
4352  format %{ %}
4353  interface(REG_INTER);
4354%}
4355
4356operand nabxRegP() %{
4357  constraint(ALLOC_IN_RC(nabx_reg));
4358  match(RegP);
4359  match(eCXRegP);
4360  match(eDXRegP);
4361  match(eSIRegP);
4362  match(eDIRegP);
4363
4364  format %{ %}
4365  interface(REG_INTER);
4366%}
4367
4368operand pRegP() %{
4369  constraint(ALLOC_IN_RC(p_reg));
4370  match(RegP);
4371  match(eBXRegP);
4372  match(eDXRegP);
4373  match(eSIRegP);
4374  match(eDIRegP);
4375
4376  format %{ %}
4377  interface(REG_INTER);
4378%}
4379
4380// Special Registers
4381// Return a pointer value
4382operand eAXRegP(eRegP reg) %{
4383  constraint(ALLOC_IN_RC(eax_reg));
4384  match(reg);
4385  format %{ "EAX" %}
4386  interface(REG_INTER);
4387%}
4388
4389// Used in AtomicAdd
4390operand eBXRegP(eRegP reg) %{
4391  constraint(ALLOC_IN_RC(ebx_reg));
4392  match(reg);
4393  format %{ "EBX" %}
4394  interface(REG_INTER);
4395%}
4396
4397// Tail-call (interprocedural jump) to interpreter
4398operand eCXRegP(eRegP reg) %{
4399  constraint(ALLOC_IN_RC(ecx_reg));
4400  match(reg);
4401  format %{ "ECX" %}
4402  interface(REG_INTER);
4403%}
4404
4405operand eSIRegP(eRegP reg) %{
4406  constraint(ALLOC_IN_RC(esi_reg));
4407  match(reg);
4408  format %{ "ESI" %}
4409  interface(REG_INTER);
4410%}
4411
4412// Used in rep stosw
4413operand eDIRegP(eRegP reg) %{
4414  constraint(ALLOC_IN_RC(edi_reg));
4415  match(reg);
4416  format %{ "EDI" %}
4417  interface(REG_INTER);
4418%}
4419
4420operand eBPRegP() %{
4421  constraint(ALLOC_IN_RC(ebp_reg));
4422  match(RegP);
4423  format %{ "EBP" %}
4424  interface(REG_INTER);
4425%}
4426
4427operand eRegL() %{
4428  constraint(ALLOC_IN_RC(long_reg));
4429  match(RegL);
4430  match(eADXRegL);
4431
4432  format %{ %}
4433  interface(REG_INTER);
4434%}
4435
4436operand eADXRegL( eRegL reg ) %{
4437  constraint(ALLOC_IN_RC(eadx_reg));
4438  match(reg);
4439
4440  format %{ "EDX:EAX" %}
4441  interface(REG_INTER);
4442%}
4443
4444operand eBCXRegL( eRegL reg ) %{
4445  constraint(ALLOC_IN_RC(ebcx_reg));
4446  match(reg);
4447
4448  format %{ "EBX:ECX" %}
4449  interface(REG_INTER);
4450%}
4451
4452// Special case for integer high multiply
4453operand eADXRegL_low_only() %{
4454  constraint(ALLOC_IN_RC(eadx_reg));
4455  match(RegL);
4456
4457  format %{ "EAX" %}
4458  interface(REG_INTER);
4459%}
4460
4461// Flags register, used as output of compare instructions
4462operand eFlagsReg() %{
4463  constraint(ALLOC_IN_RC(int_flags));
4464  match(RegFlags);
4465
4466  format %{ "EFLAGS" %}
4467  interface(REG_INTER);
4468%}
4469
4470// Flags register, used as output of FLOATING POINT compare instructions
4471operand eFlagsRegU() %{
4472  constraint(ALLOC_IN_RC(int_flags));
4473  match(RegFlags);
4474
4475  format %{ "EFLAGS_U" %}
4476  interface(REG_INTER);
4477%}
4478
4479operand eFlagsRegUCF() %{
4480  constraint(ALLOC_IN_RC(int_flags));
4481  match(RegFlags);
4482  predicate(false);
4483
4484  format %{ "EFLAGS_U_CF" %}
4485  interface(REG_INTER);
4486%}
4487
4488// Condition Code Register used by long compare
4489operand flagsReg_long_LTGE() %{
4490  constraint(ALLOC_IN_RC(int_flags));
4491  match(RegFlags);
4492  format %{ "FLAGS_LTGE" %}
4493  interface(REG_INTER);
4494%}
4495operand flagsReg_long_EQNE() %{
4496  constraint(ALLOC_IN_RC(int_flags));
4497  match(RegFlags);
4498  format %{ "FLAGS_EQNE" %}
4499  interface(REG_INTER);
4500%}
4501operand flagsReg_long_LEGT() %{
4502  constraint(ALLOC_IN_RC(int_flags));
4503  match(RegFlags);
4504  format %{ "FLAGS_LEGT" %}
4505  interface(REG_INTER);
4506%}
4507
4508// Float register operands
4509operand regDPR() %{
4510  predicate( UseSSE < 2 );
4511  constraint(ALLOC_IN_RC(dbl_reg));
4512  match(RegD);
4513  match(regDPR1);
4514  match(regDPR2);
4515  format %{ %}
4516  interface(REG_INTER);
4517%}
4518
4519operand regDPR1(regDPR reg) %{
4520  predicate( UseSSE < 2 );
4521  constraint(ALLOC_IN_RC(dbl_reg0));
4522  match(reg);
4523  format %{ "FPR1" %}
4524  interface(REG_INTER);
4525%}
4526
4527operand regDPR2(regDPR reg) %{
4528  predicate( UseSSE < 2 );
4529  constraint(ALLOC_IN_RC(dbl_reg1));
4530  match(reg);
4531  format %{ "FPR2" %}
4532  interface(REG_INTER);
4533%}
4534
4535operand regnotDPR1(regDPR reg) %{
4536  predicate( UseSSE < 2 );
4537  constraint(ALLOC_IN_RC(dbl_notreg0));
4538  match(reg);
4539  format %{ %}
4540  interface(REG_INTER);
4541%}
4542
4543// XMM Double register operands
4544operand regD() %{
4545  predicate( UseSSE>=2 );
4546  constraint(ALLOC_IN_RC(xdb_reg));
4547  match(RegD);
4548  match(regD6);
4549  match(regD7);
4550  format %{ %}
4551  interface(REG_INTER);
4552%}
4553
4554// XMM6 double register operands
4555operand regD6(regD reg) %{
4556  predicate( UseSSE>=2 );
4557  constraint(ALLOC_IN_RC(xdb_reg6));
4558  match(reg);
4559  format %{ "XMM6" %}
4560  interface(REG_INTER);
4561%}
4562
4563// XMM7 double register operands
4564operand regD7(regD reg) %{
4565  predicate( UseSSE>=2 );
4566  constraint(ALLOC_IN_RC(xdb_reg7));
4567  match(reg);
4568  format %{ "XMM7" %}
4569  interface(REG_INTER);
4570%}
4571
4572// Float register operands
4573operand regFPR() %{
4574  predicate( UseSSE < 2 );
4575  constraint(ALLOC_IN_RC(flt_reg));
4576  match(RegF);
4577  match(regFPR1);
4578  format %{ %}
4579  interface(REG_INTER);
4580%}
4581
4582// Float register operands
4583operand regFPR1(regFPR reg) %{
4584  predicate( UseSSE < 2 );
4585  constraint(ALLOC_IN_RC(flt_reg0));
4586  match(reg);
4587  format %{ "FPR1" %}
4588  interface(REG_INTER);
4589%}
4590
4591// XMM register operands
4592operand regF() %{
4593  predicate( UseSSE>=1 );
4594  constraint(ALLOC_IN_RC(xmm_reg));
4595  match(RegF);
4596  format %{ %}
4597  interface(REG_INTER);
4598%}
4599
4600
4601//----------Memory Operands----------------------------------------------------
4602// Direct Memory Operand
4603operand direct(immP addr) %{
4604  match(addr);
4605
4606  format %{ "[$addr]" %}
4607  interface(MEMORY_INTER) %{
4608    base(0xFFFFFFFF);
4609    index(0x4);
4610    scale(0x0);
4611    disp($addr);
4612  %}
4613%}
4614
4615// Indirect Memory Operand
4616operand indirect(eRegP reg) %{
4617  constraint(ALLOC_IN_RC(e_reg));
4618  match(reg);
4619
4620  format %{ "[$reg]" %}
4621  interface(MEMORY_INTER) %{
4622    base($reg);
4623    index(0x4);
4624    scale(0x0);
4625    disp(0x0);
4626  %}
4627%}
4628
4629// Indirect Memory Plus Short Offset Operand
4630operand indOffset8(eRegP reg, immI8 off) %{
4631  match(AddP reg off);
4632
4633  format %{ "[$reg + $off]" %}
4634  interface(MEMORY_INTER) %{
4635    base($reg);
4636    index(0x4);
4637    scale(0x0);
4638    disp($off);
4639  %}
4640%}
4641
4642// Indirect Memory Plus Long Offset Operand
4643operand indOffset32(eRegP reg, immI off) %{
4644  match(AddP reg off);
4645
4646  format %{ "[$reg + $off]" %}
4647  interface(MEMORY_INTER) %{
4648    base($reg);
4649    index(0x4);
4650    scale(0x0);
4651    disp($off);
4652  %}
4653%}
4654
4655// Indirect Memory Plus Long Offset Operand
4656operand indOffset32X(eRegI reg, immP off) %{
4657  match(AddP off reg);
4658
4659  format %{ "[$reg + $off]" %}
4660  interface(MEMORY_INTER) %{
4661    base($reg);
4662    index(0x4);
4663    scale(0x0);
4664    disp($off);
4665  %}
4666%}
4667
4668// Indirect Memory Plus Index Register Plus Offset Operand
4669operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
4670  match(AddP (AddP reg ireg) off);
4671
4672  op_cost(10);
4673  format %{"[$reg + $off + $ireg]" %}
4674  interface(MEMORY_INTER) %{
4675    base($reg);
4676    index($ireg);
4677    scale(0x0);
4678    disp($off);
4679  %}
4680%}
4681
4682// Indirect Memory Plus Index Register Plus Offset Operand
4683operand indIndex(eRegP reg, eRegI ireg) %{
4684  match(AddP reg ireg);
4685
4686  op_cost(10);
4687  format %{"[$reg + $ireg]" %}
4688  interface(MEMORY_INTER) %{
4689    base($reg);
4690    index($ireg);
4691    scale(0x0);
4692    disp(0x0);
4693  %}
4694%}
4695
4696// // -------------------------------------------------------------------------
4697// // 486 architecture doesn't support "scale * index + offset" with out a base
4698// // -------------------------------------------------------------------------
4699// // Scaled Memory Operands
4700// // Indirect Memory Times Scale Plus Offset Operand
4701// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
4702//   match(AddP off (LShiftI ireg scale));
4703//
4704//   op_cost(10);
4705//   format %{"[$off + $ireg << $scale]" %}
4706//   interface(MEMORY_INTER) %{
4707//     base(0x4);
4708//     index($ireg);
4709//     scale($scale);
4710//     disp($off);
4711//   %}
4712// %}
4713
4714// Indirect Memory Times Scale Plus Index Register
4715operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
4716  match(AddP reg (LShiftI ireg scale));
4717
4718  op_cost(10);
4719  format %{"[$reg + $ireg << $scale]" %}
4720  interface(MEMORY_INTER) %{
4721    base($reg);
4722    index($ireg);
4723    scale($scale);
4724    disp(0x0);
4725  %}
4726%}
4727
4728// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4729operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
4730  match(AddP (AddP reg (LShiftI ireg scale)) off);
4731
4732  op_cost(10);
4733  format %{"[$reg + $off + $ireg << $scale]" %}
4734  interface(MEMORY_INTER) %{
4735    base($reg);
4736    index($ireg);
4737    scale($scale);
4738    disp($off);
4739  %}
4740%}
4741
4742//----------Load Long Memory Operands------------------------------------------
4743// The load-long idiom will use it's address expression again after loading
4744// the first word of the long.  If the load-long destination overlaps with
4745// registers used in the addressing expression, the 2nd half will be loaded
4746// from a clobbered address.  Fix this by requiring that load-long use
4747// address registers that do not overlap with the load-long target.
4748
4749// load-long support
4750operand load_long_RegP() %{
4751  constraint(ALLOC_IN_RC(esi_reg));
4752  match(RegP);
4753  match(eSIRegP);
4754  op_cost(100);
4755  format %{  %}
4756  interface(REG_INTER);
4757%}
4758
4759// Indirect Memory Operand Long
4760operand load_long_indirect(load_long_RegP reg) %{
4761  constraint(ALLOC_IN_RC(esi_reg));
4762  match(reg);
4763
4764  format %{ "[$reg]" %}
4765  interface(MEMORY_INTER) %{
4766    base($reg);
4767    index(0x4);
4768    scale(0x0);
4769    disp(0x0);
4770  %}
4771%}
4772
4773// Indirect Memory Plus Long Offset Operand
4774operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4775  match(AddP reg off);
4776
4777  format %{ "[$reg + $off]" %}
4778  interface(MEMORY_INTER) %{
4779    base($reg);
4780    index(0x4);
4781    scale(0x0);
4782    disp($off);
4783  %}
4784%}
4785
4786opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4787
4788
4789//----------Special Memory Operands--------------------------------------------
4790// Stack Slot Operand - This operand is used for loading and storing temporary
4791//                      values on the stack where a match requires a value to
4792//                      flow through memory.
4793operand stackSlotP(sRegP reg) %{
4794  constraint(ALLOC_IN_RC(stack_slots));
4795  // No match rule because this operand is only generated in matching
4796  format %{ "[$reg]" %}
4797  interface(MEMORY_INTER) %{
4798    base(0x4);   // ESP
4799    index(0x4);  // No Index
4800    scale(0x0);  // No Scale
4801    disp($reg);  // Stack Offset
4802  %}
4803%}
4804
4805operand stackSlotI(sRegI reg) %{
4806  constraint(ALLOC_IN_RC(stack_slots));
4807  // No match rule because this operand is only generated in matching
4808  format %{ "[$reg]" %}
4809  interface(MEMORY_INTER) %{
4810    base(0x4);   // ESP
4811    index(0x4);  // No Index
4812    scale(0x0);  // No Scale
4813    disp($reg);  // Stack Offset
4814  %}
4815%}
4816
4817operand stackSlotF(sRegF reg) %{
4818  constraint(ALLOC_IN_RC(stack_slots));
4819  // No match rule because this operand is only generated in matching
4820  format %{ "[$reg]" %}
4821  interface(MEMORY_INTER) %{
4822    base(0x4);   // ESP
4823    index(0x4);  // No Index
4824    scale(0x0);  // No Scale
4825    disp($reg);  // Stack Offset
4826  %}
4827%}
4828
4829operand stackSlotD(sRegD reg) %{
4830  constraint(ALLOC_IN_RC(stack_slots));
4831  // No match rule because this operand is only generated in matching
4832  format %{ "[$reg]" %}
4833  interface(MEMORY_INTER) %{
4834    base(0x4);   // ESP
4835    index(0x4);  // No Index
4836    scale(0x0);  // No Scale
4837    disp($reg);  // Stack Offset
4838  %}
4839%}
4840
4841operand stackSlotL(sRegL reg) %{
4842  constraint(ALLOC_IN_RC(stack_slots));
4843  // No match rule because this operand is only generated in matching
4844  format %{ "[$reg]" %}
4845  interface(MEMORY_INTER) %{
4846    base(0x4);   // ESP
4847    index(0x4);  // No Index
4848    scale(0x0);  // No Scale
4849    disp($reg);  // Stack Offset
4850  %}
4851%}
4852
4853//----------Memory Operands - Win95 Implicit Null Variants----------------
4854// Indirect Memory Operand
4855operand indirect_win95_safe(eRegP_no_EBP reg)
4856%{
4857  constraint(ALLOC_IN_RC(e_reg));
4858  match(reg);
4859
4860  op_cost(100);
4861  format %{ "[$reg]" %}
4862  interface(MEMORY_INTER) %{
4863    base($reg);
4864    index(0x4);
4865    scale(0x0);
4866    disp(0x0);
4867  %}
4868%}
4869
4870// Indirect Memory Plus Short Offset Operand
4871operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4872%{
4873  match(AddP reg off);
4874
4875  op_cost(100);
4876  format %{ "[$reg + $off]" %}
4877  interface(MEMORY_INTER) %{
4878    base($reg);
4879    index(0x4);
4880    scale(0x0);
4881    disp($off);
4882  %}
4883%}
4884
4885// Indirect Memory Plus Long Offset Operand
4886operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4887%{
4888  match(AddP reg off);
4889
4890  op_cost(100);
4891  format %{ "[$reg + $off]" %}
4892  interface(MEMORY_INTER) %{
4893    base($reg);
4894    index(0x4);
4895    scale(0x0);
4896    disp($off);
4897  %}
4898%}
4899
4900// Indirect Memory Plus Index Register Plus Offset Operand
4901operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
4902%{
4903  match(AddP (AddP reg ireg) off);
4904
4905  op_cost(100);
4906  format %{"[$reg + $off + $ireg]" %}
4907  interface(MEMORY_INTER) %{
4908    base($reg);
4909    index($ireg);
4910    scale(0x0);
4911    disp($off);
4912  %}
4913%}
4914
4915// Indirect Memory Times Scale Plus Index Register
4916operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
4917%{
4918  match(AddP reg (LShiftI ireg scale));
4919
4920  op_cost(100);
4921  format %{"[$reg + $ireg << $scale]" %}
4922  interface(MEMORY_INTER) %{
4923    base($reg);
4924    index($ireg);
4925    scale($scale);
4926    disp(0x0);
4927  %}
4928%}
4929
4930// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4931operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
4932%{
4933  match(AddP (AddP reg (LShiftI ireg scale)) off);
4934
4935  op_cost(100);
4936  format %{"[$reg + $off + $ireg << $scale]" %}
4937  interface(MEMORY_INTER) %{
4938    base($reg);
4939    index($ireg);
4940    scale($scale);
4941    disp($off);
4942  %}
4943%}
4944
4945//----------Conditional Branch Operands----------------------------------------
4946// Comparison Op  - This is the operation of the comparison, and is limited to
4947//                  the following set of codes:
4948//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4949//
4950// Other attributes of the comparison, such as unsignedness, are specified
4951// by the comparison instruction that sets a condition code flags register.
4952// That result is represented by a flags operand whose subtype is appropriate
4953// to the unsignedness (etc.) of the comparison.
4954//
4955// Later, the instruction which matches both the Comparison Op (a Bool) and
4956// the flags (produced by the Cmp) specifies the coding of the comparison op
4957// by matching a specific subtype of Bool operand below, such as cmpOpU.
4958
4959// Comparision Code
4960operand cmpOp() %{
4961  match(Bool);
4962
4963  format %{ "" %}
4964  interface(COND_INTER) %{
4965    equal(0x4, "e");
4966    not_equal(0x5, "ne");
4967    less(0xC, "l");
4968    greater_equal(0xD, "ge");
4969    less_equal(0xE, "le");
4970    greater(0xF, "g");
4971  %}
4972%}
4973
4974// Comparison Code, unsigned compare.  Used by FP also, with
4975// C2 (unordered) turned into GT or LT already.  The other bits
4976// C0 and C3 are turned into Carry & Zero flags.
4977operand cmpOpU() %{
4978  match(Bool);
4979
4980  format %{ "" %}
4981  interface(COND_INTER) %{
4982    equal(0x4, "e");
4983    not_equal(0x5, "ne");
4984    less(0x2, "b");
4985    greater_equal(0x3, "nb");
4986    less_equal(0x6, "be");
4987    greater(0x7, "nbe");
4988  %}
4989%}
4990
4991// Floating comparisons that don't require any fixup for the unordered case
4992operand cmpOpUCF() %{
4993  match(Bool);
4994  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4995            n->as_Bool()->_test._test == BoolTest::ge ||
4996            n->as_Bool()->_test._test == BoolTest::le ||
4997            n->as_Bool()->_test._test == BoolTest::gt);
4998  format %{ "" %}
4999  interface(COND_INTER) %{
5000    equal(0x4, "e");
5001    not_equal(0x5, "ne");
5002    less(0x2, "b");
5003    greater_equal(0x3, "nb");
5004    less_equal(0x6, "be");
5005    greater(0x7, "nbe");
5006  %}
5007%}
5008
5009
5010// Floating comparisons that can be fixed up with extra conditional jumps
5011operand cmpOpUCF2() %{
5012  match(Bool);
5013  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5014            n->as_Bool()->_test._test == BoolTest::eq);
5015  format %{ "" %}
5016  interface(COND_INTER) %{
5017    equal(0x4, "e");
5018    not_equal(0x5, "ne");
5019    less(0x2, "b");
5020    greater_equal(0x3, "nb");
5021    less_equal(0x6, "be");
5022    greater(0x7, "nbe");
5023  %}
5024%}
5025
5026// Comparison Code for FP conditional move
5027operand cmpOp_fcmov() %{
5028  match(Bool);
5029
5030  format %{ "" %}
5031  interface(COND_INTER) %{
5032    equal        (0x0C8);
5033    not_equal    (0x1C8);
5034    less         (0x0C0);
5035    greater_equal(0x1C0);
5036    less_equal   (0x0D0);
5037    greater      (0x1D0);
5038  %}
5039%}
5040
5041// Comparision Code used in long compares
5042operand cmpOp_commute() %{
5043  match(Bool);
5044
5045  format %{ "" %}
5046  interface(COND_INTER) %{
5047    equal(0x4, "e");
5048    not_equal(0x5, "ne");
5049    less(0xF, "g");
5050    greater_equal(0xE, "le");
5051    less_equal(0xD, "ge");
5052    greater(0xC, "l");
5053  %}
5054%}
5055
5056//----------OPERAND CLASSES----------------------------------------------------
5057// Operand Classes are groups of operands that are used as to simplify
5058// instruction definitions by not requiring the AD writer to specify separate
5059// instructions for every form of operand when the instruction accepts
5060// multiple operand types with the same basic encoding and format.  The classic
5061// case of this is memory operands.
5062
5063opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5064               indIndex, indIndexScale, indIndexScaleOffset);
5065
5066// Long memory operations are encoded in 2 instructions and a +4 offset.
5067// This means some kind of offset is always required and you cannot use
5068// an oop as the offset (done when working on static globals).
5069opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5070                    indIndex, indIndexScale, indIndexScaleOffset);
5071
5072
5073//----------PIPELINE-----------------------------------------------------------
5074// Rules which define the behavior of the target architectures pipeline.
5075pipeline %{
5076
5077//----------ATTRIBUTES---------------------------------------------------------
5078attributes %{
5079  variable_size_instructions;        // Fixed size instructions
5080  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5081  instruction_unit_size = 1;         // An instruction is 1 bytes long
5082  instruction_fetch_unit_size = 16;  // The processor fetches one line
5083  instruction_fetch_units = 1;       // of 16 bytes
5084
5085  // List of nop instructions
5086  nops( MachNop );
5087%}
5088
5089//----------RESOURCES----------------------------------------------------------
5090// Resources are the functional units available to the machine
5091
5092// Generic P2/P3 pipeline
5093// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5094// 3 instructions decoded per cycle.
5095// 2 load/store ops per cycle, 1 branch, 1 FPU,
5096// 2 ALU op, only ALU0 handles mul/div instructions.
5097resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5098           MS0, MS1, MEM = MS0 | MS1,
5099           BR, FPU,
5100           ALU0, ALU1, ALU = ALU0 | ALU1 );
5101
5102//----------PIPELINE DESCRIPTION-----------------------------------------------
5103// Pipeline Description specifies the stages in the machine's pipeline
5104
5105// Generic P2/P3 pipeline
5106pipe_desc(S0, S1, S2, S3, S4, S5);
5107
5108//----------PIPELINE CLASSES---------------------------------------------------
5109// Pipeline Classes describe the stages in which input and output are
5110// referenced by the hardware pipeline.
5111
5112// Naming convention: ialu or fpu
5113// Then: _reg
5114// Then: _reg if there is a 2nd register
5115// Then: _long if it's a pair of instructions implementing a long
5116// Then: _fat if it requires the big decoder
5117//   Or: _mem if it requires the big decoder and a memory unit.
5118
5119// Integer ALU reg operation
5120pipe_class ialu_reg(eRegI dst) %{
5121    single_instruction;
5122    dst    : S4(write);
5123    dst    : S3(read);
5124    DECODE : S0;        // any decoder
5125    ALU    : S3;        // any alu
5126%}
5127
5128// Long ALU reg operation
5129pipe_class ialu_reg_long(eRegL dst) %{
5130    instruction_count(2);
5131    dst    : S4(write);
5132    dst    : S3(read);
5133    DECODE : S0(2);     // any 2 decoders
5134    ALU    : S3(2);     // both alus
5135%}
5136
5137// Integer ALU reg operation using big decoder
5138pipe_class ialu_reg_fat(eRegI dst) %{
5139    single_instruction;
5140    dst    : S4(write);
5141    dst    : S3(read);
5142    D0     : S0;        // big decoder only
5143    ALU    : S3;        // any alu
5144%}
5145
5146// Long ALU reg operation using big decoder
5147pipe_class ialu_reg_long_fat(eRegL dst) %{
5148    instruction_count(2);
5149    dst    : S4(write);
5150    dst    : S3(read);
5151    D0     : S0(2);     // big decoder only; twice
5152    ALU    : S3(2);     // any 2 alus
5153%}
5154
5155// Integer ALU reg-reg operation
5156pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5157    single_instruction;
5158    dst    : S4(write);
5159    src    : S3(read);
5160    DECODE : S0;        // any decoder
5161    ALU    : S3;        // any alu
5162%}
5163
5164// Long ALU reg-reg operation
5165pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5166    instruction_count(2);
5167    dst    : S4(write);
5168    src    : S3(read);
5169    DECODE : S0(2);     // any 2 decoders
5170    ALU    : S3(2);     // both alus
5171%}
5172
5173// Integer ALU reg-reg operation
5174pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5175    single_instruction;
5176    dst    : S4(write);
5177    src    : S3(read);
5178    D0     : S0;        // big decoder only
5179    ALU    : S3;        // any alu
5180%}
5181
5182// Long ALU reg-reg operation
5183pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5184    instruction_count(2);
5185    dst    : S4(write);
5186    src    : S3(read);
5187    D0     : S0(2);     // big decoder only; twice
5188    ALU    : S3(2);     // both alus
5189%}
5190
5191// Integer ALU reg-mem operation
5192pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5193    single_instruction;
5194    dst    : S5(write);
5195    mem    : S3(read);
5196    D0     : S0;        // big decoder only
5197    ALU    : S4;        // any alu
5198    MEM    : S3;        // any mem
5199%}
5200
5201// Long ALU reg-mem operation
5202pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
5203    instruction_count(2);
5204    dst    : S5(write);
5205    mem    : S3(read);
5206    D0     : S0(2);     // big decoder only; twice
5207    ALU    : S4(2);     // any 2 alus
5208    MEM    : S3(2);     // both mems
5209%}
5210
5211// Integer mem operation (prefetch)
5212pipe_class ialu_mem(memory mem)
5213%{
5214    single_instruction;
5215    mem    : S3(read);
5216    D0     : S0;        // big decoder only
5217    MEM    : S3;        // any mem
5218%}
5219
5220// Integer Store to Memory
5221pipe_class ialu_mem_reg(memory mem, eRegI src) %{
5222    single_instruction;
5223    mem    : S3(read);
5224    src    : S5(read);
5225    D0     : S0;        // big decoder only
5226    ALU    : S4;        // any alu
5227    MEM    : S3;
5228%}
5229
5230// Long Store to Memory
5231pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
5232    instruction_count(2);
5233    mem    : S3(read);
5234    src    : S5(read);
5235    D0     : S0(2);     // big decoder only; twice
5236    ALU    : S4(2);     // any 2 alus
5237    MEM    : S3(2);     // Both mems
5238%}
5239
5240// Integer Store to Memory
5241pipe_class ialu_mem_imm(memory mem) %{
5242    single_instruction;
5243    mem    : S3(read);
5244    D0     : S0;        // big decoder only
5245    ALU    : S4;        // any alu
5246    MEM    : S3;
5247%}
5248
5249// Integer ALU0 reg-reg operation
5250pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
5251    single_instruction;
5252    dst    : S4(write);
5253    src    : S3(read);
5254    D0     : S0;        // Big decoder only
5255    ALU0   : S3;        // only alu0
5256%}
5257
5258// Integer ALU0 reg-mem operation
5259pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
5260    single_instruction;
5261    dst    : S5(write);
5262    mem    : S3(read);
5263    D0     : S0;        // big decoder only
5264    ALU0   : S4;        // ALU0 only
5265    MEM    : S3;        // any mem
5266%}
5267
5268// Integer ALU reg-reg operation
5269pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
5270    single_instruction;
5271    cr     : S4(write);
5272    src1   : S3(read);
5273    src2   : S3(read);
5274    DECODE : S0;        // any decoder
5275    ALU    : S3;        // any alu
5276%}
5277
5278// Integer ALU reg-imm operation
5279pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
5280    single_instruction;
5281    cr     : S4(write);
5282    src1   : S3(read);
5283    DECODE : S0;        // any decoder
5284    ALU    : S3;        // any alu
5285%}
5286
5287// Integer ALU reg-mem operation
5288pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
5289    single_instruction;
5290    cr     : S4(write);
5291    src1   : S3(read);
5292    src2   : S3(read);
5293    D0     : S0;        // big decoder only
5294    ALU    : S4;        // any alu
5295    MEM    : S3;
5296%}
5297
5298// Conditional move reg-reg
5299pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
5300    instruction_count(4);
5301    y      : S4(read);
5302    q      : S3(read);
5303    p      : S3(read);
5304    DECODE : S0(4);     // any decoder
5305%}
5306
5307// Conditional move reg-reg
5308pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
5309    single_instruction;
5310    dst    : S4(write);
5311    src    : S3(read);
5312    cr     : S3(read);
5313    DECODE : S0;        // any decoder
5314%}
5315
5316// Conditional move reg-mem
5317pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
5318    single_instruction;
5319    dst    : S4(write);
5320    src    : S3(read);
5321    cr     : S3(read);
5322    DECODE : S0;        // any decoder
5323    MEM    : S3;
5324%}
5325
5326// Conditional move reg-reg long
5327pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
5328    single_instruction;
5329    dst    : S4(write);
5330    src    : S3(read);
5331    cr     : S3(read);
5332    DECODE : S0(2);     // any 2 decoders
5333%}
5334
5335// Conditional move double reg-reg
5336pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
5337    single_instruction;
5338    dst    : S4(write);
5339    src    : S3(read);
5340    cr     : S3(read);
5341    DECODE : S0;        // any decoder
5342%}
5343
5344// Float reg-reg operation
5345pipe_class fpu_reg(regDPR dst) %{
5346    instruction_count(2);
5347    dst    : S3(read);
5348    DECODE : S0(2);     // any 2 decoders
5349    FPU    : S3;
5350%}
5351
5352// Float reg-reg operation
5353pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
5354    instruction_count(2);
5355    dst    : S4(write);
5356    src    : S3(read);
5357    DECODE : S0(2);     // any 2 decoders
5358    FPU    : S3;
5359%}
5360
5361// Float reg-reg operation
5362pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
5363    instruction_count(3);
5364    dst    : S4(write);
5365    src1   : S3(read);
5366    src2   : S3(read);
5367    DECODE : S0(3);     // any 3 decoders
5368    FPU    : S3(2);
5369%}
5370
5371// Float reg-reg operation
5372pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
5373    instruction_count(4);
5374    dst    : S4(write);
5375    src1   : S3(read);
5376    src2   : S3(read);
5377    src3   : S3(read);
5378    DECODE : S0(4);     // any 3 decoders
5379    FPU    : S3(2);
5380%}
5381
5382// Float reg-reg operation
5383pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
5384    instruction_count(4);
5385    dst    : S4(write);
5386    src1   : S3(read);
5387    src2   : S3(read);
5388    src3   : S3(read);
5389    DECODE : S1(3);     // any 3 decoders
5390    D0     : S0;        // Big decoder only
5391    FPU    : S3(2);
5392    MEM    : S3;
5393%}
5394
5395// Float reg-mem operation
5396pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
5397    instruction_count(2);
5398    dst    : S5(write);
5399    mem    : S3(read);
5400    D0     : S0;        // big decoder only
5401    DECODE : S1;        // any decoder for FPU POP
5402    FPU    : S4;
5403    MEM    : S3;        // any mem
5404%}
5405
5406// Float reg-mem operation
5407pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
5408    instruction_count(3);
5409    dst    : S5(write);
5410    src1   : S3(read);
5411    mem    : S3(read);
5412    D0     : S0;        // big decoder only
5413    DECODE : S1(2);     // any decoder for FPU POP
5414    FPU    : S4;
5415    MEM    : S3;        // any mem
5416%}
5417
5418// Float mem-reg operation
5419pipe_class fpu_mem_reg(memory mem, regDPR src) %{
5420    instruction_count(2);
5421    src    : S5(read);
5422    mem    : S3(read);
5423    DECODE : S0;        // any decoder for FPU PUSH
5424    D0     : S1;        // big decoder only
5425    FPU    : S4;
5426    MEM    : S3;        // any mem
5427%}
5428
5429pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
5430    instruction_count(3);
5431    src1   : S3(read);
5432    src2   : S3(read);
5433    mem    : S3(read);
5434    DECODE : S0(2);     // any decoder for FPU PUSH
5435    D0     : S1;        // big decoder only
5436    FPU    : S4;
5437    MEM    : S3;        // any mem
5438%}
5439
5440pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
5441    instruction_count(3);
5442    src1   : S3(read);
5443    src2   : S3(read);
5444    mem    : S4(read);
5445    DECODE : S0;        // any decoder for FPU PUSH
5446    D0     : S0(2);     // big decoder only
5447    FPU    : S4;
5448    MEM    : S3(2);     // any mem
5449%}
5450
5451pipe_class fpu_mem_mem(memory dst, memory src1) %{
5452    instruction_count(2);
5453    src1   : S3(read);
5454    dst    : S4(read);
5455    D0     : S0(2);     // big decoder only
5456    MEM    : S3(2);     // any mem
5457%}
5458
5459pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
5460    instruction_count(3);
5461    src1   : S3(read);
5462    src2   : S3(read);
5463    dst    : S4(read);
5464    D0     : S0(3);     // big decoder only
5465    FPU    : S4;
5466    MEM    : S3(3);     // any mem
5467%}
5468
5469pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
5470    instruction_count(3);
5471    src1   : S4(read);
5472    mem    : S4(read);
5473    DECODE : S0;        // any decoder for FPU PUSH
5474    D0     : S0(2);     // big decoder only
5475    FPU    : S4;
5476    MEM    : S3(2);     // any mem
5477%}
5478
5479// Float load constant
5480pipe_class fpu_reg_con(regDPR dst) %{
5481    instruction_count(2);
5482    dst    : S5(write);
5483    D0     : S0;        // big decoder only for the load
5484    DECODE : S1;        // any decoder for FPU POP
5485    FPU    : S4;
5486    MEM    : S3;        // any mem
5487%}
5488
5489// Float load constant
5490pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5491    instruction_count(3);
5492    dst    : S5(write);
5493    src    : S3(read);
5494    D0     : S0;        // big decoder only for the load
5495    DECODE : S1(2);     // any decoder for FPU POP
5496    FPU    : S4;
5497    MEM    : S3;        // any mem
5498%}
5499
5500// UnConditional branch
5501pipe_class pipe_jmp( label labl ) %{
5502    single_instruction;
5503    BR   : S3;
5504%}
5505
5506// Conditional branch
5507pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5508    single_instruction;
5509    cr    : S1(read);
5510    BR    : S3;
5511%}
5512
5513// Allocation idiom
5514pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5515    instruction_count(1); force_serialization;
5516    fixed_latency(6);
5517    heap_ptr : S3(read);
5518    DECODE   : S0(3);
5519    D0       : S2;
5520    MEM      : S3;
5521    ALU      : S3(2);
5522    dst      : S5(write);
5523    BR       : S5;
5524%}
5525
5526// Generic big/slow expanded idiom
5527pipe_class pipe_slow(  ) %{
5528    instruction_count(10); multiple_bundles; force_serialization;
5529    fixed_latency(100);
5530    D0  : S0(2);
5531    MEM : S3(2);
5532%}
5533
5534// The real do-nothing guy
5535pipe_class empty( ) %{
5536    instruction_count(0);
5537%}
5538
5539// Define the class for the Nop node
5540define %{
5541   MachNop = empty;
5542%}
5543
5544%}
5545
5546//----------INSTRUCTIONS-------------------------------------------------------
5547//
5548// match      -- States which machine-independent subtree may be replaced
5549//               by this instruction.
5550// ins_cost   -- The estimated cost of this instruction is used by instruction
5551//               selection to identify a minimum cost tree of machine
5552//               instructions that matches a tree of machine-independent
5553//               instructions.
5554// format     -- A string providing the disassembly for this instruction.
5555//               The value of an instruction's operand may be inserted
5556//               by referring to it with a '$' prefix.
5557// opcode     -- Three instruction opcodes may be provided.  These are referred
5558//               to within an encode class as $primary, $secondary, and $tertiary
5559//               respectively.  The primary opcode is commonly used to
5560//               indicate the type of machine instruction, while secondary
5561//               and tertiary are often used for prefix options or addressing
5562//               modes.
5563// ins_encode -- A list of encode classes with parameters. The encode class
5564//               name must have been defined in an 'enc_class' specification
5565//               in the encode section of the architecture description.
5566
5567//----------BSWAP-Instruction--------------------------------------------------
5568instruct bytes_reverse_int(eRegI dst) %{
5569  match(Set dst (ReverseBytesI dst));
5570
5571  format %{ "BSWAP  $dst" %}
5572  opcode(0x0F, 0xC8);
5573  ins_encode( OpcP, OpcSReg(dst) );
5574  ins_pipe( ialu_reg );
5575%}
5576
5577instruct bytes_reverse_long(eRegL dst) %{
5578  match(Set dst (ReverseBytesL dst));
5579
5580  format %{ "BSWAP  $dst.lo\n\t"
5581            "BSWAP  $dst.hi\n\t"
5582            "XCHG   $dst.lo $dst.hi" %}
5583
5584  ins_cost(125);
5585  ins_encode( bswap_long_bytes(dst) );
5586  ins_pipe( ialu_reg_reg);
5587%}
5588
5589instruct bytes_reverse_unsigned_short(eRegI dst) %{
5590  match(Set dst (ReverseBytesUS dst));
5591
5592  format %{ "BSWAP  $dst\n\t" 
5593            "SHR    $dst,16\n\t" %}
5594  ins_encode %{
5595    __ bswapl($dst$$Register);
5596    __ shrl($dst$$Register, 16); 
5597  %}
5598  ins_pipe( ialu_reg );
5599%}
5600
5601instruct bytes_reverse_short(eRegI dst) %{
5602  match(Set dst (ReverseBytesS dst));
5603
5604  format %{ "BSWAP  $dst\n\t" 
5605            "SAR    $dst,16\n\t" %}
5606  ins_encode %{
5607    __ bswapl($dst$$Register);
5608    __ sarl($dst$$Register, 16); 
5609  %}
5610  ins_pipe( ialu_reg );
5611%}
5612
5613
5614//---------- Zeros Count Instructions ------------------------------------------
5615
5616instruct countLeadingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
5617  predicate(UseCountLeadingZerosInstruction);
5618  match(Set dst (CountLeadingZerosI src));
5619  effect(KILL cr);
5620
5621  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5622  ins_encode %{
5623    __ lzcntl($dst$$Register, $src$$Register);
5624  %}
5625  ins_pipe(ialu_reg);
5626%}
5627
5628instruct countLeadingZerosI_bsr(eRegI dst, eRegI src, eFlagsReg cr) %{
5629  predicate(!UseCountLeadingZerosInstruction);
5630  match(Set dst (CountLeadingZerosI src));
5631  effect(KILL cr);
5632
5633  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5634            "JNZ    skip\n\t"
5635            "MOV    $dst, -1\n"
5636      "skip:\n\t"
5637            "NEG    $dst\n\t"
5638            "ADD    $dst, 31" %}
5639  ins_encode %{
5640    Register Rdst = $dst$$Register;
5641    Register Rsrc = $src$$Register;
5642    Label skip;
5643    __ bsrl(Rdst, Rsrc);
5644    __ jccb(Assembler::notZero, skip);
5645    __ movl(Rdst, -1);
5646    __ bind(skip);
5647    __ negl(Rdst);
5648    __ addl(Rdst, BitsPerInt - 1);
5649  %}
5650  ins_pipe(ialu_reg);
5651%}
5652
5653instruct countLeadingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
5654  predicate(UseCountLeadingZerosInstruction);
5655  match(Set dst (CountLeadingZerosL src));
5656  effect(TEMP dst, KILL cr);
5657
5658  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5659            "JNC    done\n\t"
5660            "LZCNT  $dst, $src.lo\n\t"
5661            "ADD    $dst, 32\n"
5662      "done:" %}
5663  ins_encode %{
5664    Register Rdst = $dst$$Register;
5665    Register Rsrc = $src$$Register;
5666    Label done;
5667    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5668    __ jccb(Assembler::carryClear, done);
5669    __ lzcntl(Rdst, Rsrc);
5670    __ addl(Rdst, BitsPerInt);
5671    __ bind(done);
5672  %}
5673  ins_pipe(ialu_reg);
5674%}
5675
5676instruct countLeadingZerosL_bsr(eRegI dst, eRegL src, eFlagsReg cr) %{
5677  predicate(!UseCountLeadingZerosInstruction);
5678  match(Set dst (CountLeadingZerosL src));
5679  effect(TEMP dst, KILL cr);
5680
5681  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5682            "JZ     msw_is_zero\n\t"
5683            "ADD    $dst, 32\n\t"
5684            "JMP    not_zero\n"
5685      "msw_is_zero:\n\t"
5686            "BSR    $dst, $src.lo\n\t"
5687            "JNZ    not_zero\n\t"
5688            "MOV    $dst, -1\n"
5689      "not_zero:\n\t"
5690            "NEG    $dst\n\t"
5691            "ADD    $dst, 63\n" %}
5692 ins_encode %{
5693    Register Rdst = $dst$$Register;
5694    Register Rsrc = $src$$Register;
5695    Label msw_is_zero;
5696    Label not_zero;
5697    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5698    __ jccb(Assembler::zero, msw_is_zero);
5699    __ addl(Rdst, BitsPerInt);
5700    __ jmpb(not_zero);
5701    __ bind(msw_is_zero);
5702    __ bsrl(Rdst, Rsrc);
5703    __ jccb(Assembler::notZero, not_zero);
5704    __ movl(Rdst, -1);
5705    __ bind(not_zero);
5706    __ negl(Rdst);
5707    __ addl(Rdst, BitsPerLong - 1);
5708  %}
5709  ins_pipe(ialu_reg);
5710%}
5711
5712instruct countTrailingZerosI(eRegI dst, eRegI src, eFlagsReg cr) %{
5713  match(Set dst (CountTrailingZerosI src));
5714  effect(KILL cr);
5715
5716  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5717            "JNZ    done\n\t"
5718            "MOV    $dst, 32\n"
5719      "done:" %}
5720  ins_encode %{
5721    Register Rdst = $dst$$Register;
5722    Label done;
5723    __ bsfl(Rdst, $src$$Register);
5724    __ jccb(Assembler::notZero, done);
5725    __ movl(Rdst, BitsPerInt);
5726    __ bind(done);
5727  %}
5728  ins_pipe(ialu_reg);
5729%}
5730
5731instruct countTrailingZerosL(eRegI dst, eRegL src, eFlagsReg cr) %{
5732  match(Set dst (CountTrailingZerosL src));
5733  effect(TEMP dst, KILL cr);
5734
5735  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5736            "JNZ    done\n\t"
5737            "BSF    $dst, $src.hi\n\t"
5738            "JNZ    msw_not_zero\n\t"
5739            "MOV    $dst, 32\n"
5740      "msw_not_zero:\n\t"
5741            "ADD    $dst, 32\n"
5742      "done:" %}
5743  ins_encode %{
5744    Register Rdst = $dst$$Register;
5745    Register Rsrc = $src$$Register;
5746    Label msw_not_zero;
5747    Label done;
5748    __ bsfl(Rdst, Rsrc);
5749    __ jccb(Assembler::notZero, done);
5750    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5751    __ jccb(Assembler::notZero, msw_not_zero);
5752    __ movl(Rdst, BitsPerInt);
5753    __ bind(msw_not_zero);
5754    __ addl(Rdst, BitsPerInt);
5755    __ bind(done);
5756  %}
5757  ins_pipe(ialu_reg);
5758%}
5759
5760
5761//---------- Population Count Instructions -------------------------------------
5762
5763instruct popCountI(eRegI dst, eRegI src) %{
5764  predicate(UsePopCountInstruction);
5765  match(Set dst (PopCountI src));
5766
5767  format %{ "POPCNT $dst, $src" %}
5768  ins_encode %{
5769    __ popcntl($dst$$Register, $src$$Register);
5770  %}
5771  ins_pipe(ialu_reg);
5772%}
5773
5774instruct popCountI_mem(eRegI dst, memory mem) %{
5775  predicate(UsePopCountInstruction);
5776  match(Set dst (PopCountI (LoadI mem)));
5777
5778  format %{ "POPCNT $dst, $mem" %}
5779  ins_encode %{
5780    __ popcntl($dst$$Register, $mem$$Address);
5781  %}
5782  ins_pipe(ialu_reg);
5783%}
5784
5785// Note: Long.bitCount(long) returns an int.
5786instruct popCountL(eRegI dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
5787  predicate(UsePopCountInstruction);
5788  match(Set dst (PopCountL src));
5789  effect(KILL cr, TEMP tmp, TEMP dst);
5790
5791  format %{ "POPCNT $dst, $src.lo\n\t"
5792            "POPCNT $tmp, $src.hi\n\t"
5793            "ADD    $dst, $tmp" %}
5794  ins_encode %{
5795    __ popcntl($dst$$Register, $src$$Register);
5796    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5797    __ addl($dst$$Register, $tmp$$Register);
5798  %}
5799  ins_pipe(ialu_reg);
5800%}
5801
5802// Note: Long.bitCount(long) returns an int.
5803instruct popCountL_mem(eRegI dst, memory mem, eRegI tmp, eFlagsReg cr) %{
5804  predicate(UsePopCountInstruction);
5805  match(Set dst (PopCountL (LoadL mem)));
5806  effect(KILL cr, TEMP tmp, TEMP dst);
5807
5808  format %{ "POPCNT $dst, $mem\n\t"
5809            "POPCNT $tmp, $mem+4\n\t"
5810            "ADD    $dst, $tmp" %}
5811  ins_encode %{
5812    //__ popcntl($dst$$Register, $mem$$Address$$first);
5813    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5814    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false));
5815    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false));
5816    __ addl($dst$$Register, $tmp$$Register);
5817  %}
5818  ins_pipe(ialu_reg);
5819%}
5820
5821
5822//----------Load/Store/Move Instructions---------------------------------------
5823//----------Load Instructions--------------------------------------------------
5824// Load Byte (8bit signed)
5825instruct loadB(xRegI dst, memory mem) %{
5826  match(Set dst (LoadB mem));
5827
5828  ins_cost(125);
5829  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5830
5831  ins_encode %{
5832    __ movsbl($dst$$Register, $mem$$Address);
5833  %}
5834
5835  ins_pipe(ialu_reg_mem);
5836%}
5837
5838// Load Byte (8bit signed) into Long Register
5839instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5840  match(Set dst (ConvI2L (LoadB mem)));
5841  effect(KILL cr);
5842
5843  ins_cost(375);
5844  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5845            "MOV    $dst.hi,$dst.lo\n\t"
5846            "SAR    $dst.hi,7" %}
5847
5848  ins_encode %{
5849    __ movsbl($dst$$Register, $mem$$Address);
5850    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5851    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5852  %}
5853
5854  ins_pipe(ialu_reg_mem);
5855%}
5856
5857// Load Unsigned Byte (8bit UNsigned)
5858instruct loadUB(xRegI dst, memory mem) %{
5859  match(Set dst (LoadUB mem));
5860
5861  ins_cost(125);
5862  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5863
5864  ins_encode %{
5865    __ movzbl($dst$$Register, $mem$$Address);
5866  %}
5867
5868  ins_pipe(ialu_reg_mem);
5869%}
5870
5871// Load Unsigned Byte (8 bit UNsigned) into Long Register
5872instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5873  match(Set dst (ConvI2L (LoadUB mem)));
5874  effect(KILL cr);
5875
5876  ins_cost(250);
5877  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5878            "XOR    $dst.hi,$dst.hi" %}
5879
5880  ins_encode %{
5881    Register Rdst = $dst$$Register;
5882    __ movzbl(Rdst, $mem$$Address);
5883    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5884  %}
5885
5886  ins_pipe(ialu_reg_mem);
5887%}
5888
5889// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5890instruct loadUB2L_immI8(eRegL dst, memory mem, immI8 mask, eFlagsReg cr) %{
5891  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5892  effect(KILL cr);
5893
5894  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 8-bit mask -> long\n\t"
5895            "XOR    $dst.hi,$dst.hi\n\t"
5896            "AND    $dst.lo,$mask" %}
5897  ins_encode %{
5898    Register Rdst = $dst$$Register;
5899    __ movzbl(Rdst, $mem$$Address);
5900    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5901    __ andl(Rdst, $mask$$constant);
5902  %}
5903  ins_pipe(ialu_reg_mem);
5904%}
5905
5906// Load Short (16bit signed)
5907instruct loadS(eRegI dst, memory mem) %{
5908  match(Set dst (LoadS mem));
5909
5910  ins_cost(125);
5911  format %{ "MOVSX  $dst,$mem\t# short" %}
5912
5913  ins_encode %{
5914    __ movswl($dst$$Register, $mem$$Address);
5915  %}
5916
5917  ins_pipe(ialu_reg_mem);
5918%}
5919
5920// Load Short (16 bit signed) to Byte (8 bit signed)
5921instruct loadS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
5922  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5923
5924  ins_cost(125);
5925  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5926  ins_encode %{
5927    __ movsbl($dst$$Register, $mem$$Address);
5928  %}
5929  ins_pipe(ialu_reg_mem);
5930%}
5931
5932// Load Short (16bit signed) into Long Register
5933instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5934  match(Set dst (ConvI2L (LoadS mem)));
5935  effect(KILL cr);
5936
5937  ins_cost(375);
5938  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5939            "MOV    $dst.hi,$dst.lo\n\t"
5940            "SAR    $dst.hi,15" %}
5941
5942  ins_encode %{
5943    __ movswl($dst$$Register, $mem$$Address);
5944    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5945    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5946  %}
5947
5948  ins_pipe(ialu_reg_mem);
5949%}
5950
5951// Load Unsigned Short/Char (16bit unsigned)
5952instruct loadUS(eRegI dst, memory mem) %{
5953  match(Set dst (LoadUS mem));
5954
5955  ins_cost(125);
5956  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5957
5958  ins_encode %{
5959    __ movzwl($dst$$Register, $mem$$Address);
5960  %}
5961
5962  ins_pipe(ialu_reg_mem);
5963%}
5964
5965// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5966instruct loadUS2B(eRegI dst, memory mem, immI_24 twentyfour) %{
5967  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5968
5969  ins_cost(125);
5970  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5971  ins_encode %{
5972    __ movsbl($dst$$Register, $mem$$Address);
5973  %}
5974  ins_pipe(ialu_reg_mem);
5975%}
5976
5977// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5978instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5979  match(Set dst (ConvI2L (LoadUS mem)));
5980  effect(KILL cr);
5981
5982  ins_cost(250);
5983  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5984            "XOR    $dst.hi,$dst.hi" %}
5985
5986  ins_encode %{
5987    __ movzwl($dst$$Register, $mem$$Address);
5988    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5989  %}
5990
5991  ins_pipe(ialu_reg_mem);
5992%}
5993
5994// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5995instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5996  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5997  effect(KILL cr);
5998
5999  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
6000            "XOR    $dst.hi,$dst.hi" %}
6001  ins_encode %{
6002    Register Rdst = $dst$$Register;
6003    __ movzbl(Rdst, $mem$$Address);
6004    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6005  %}
6006  ins_pipe(ialu_reg_mem);
6007%}
6008
6009// Load Unsigned Short/Char (16 bit UNsigned) with a 16-bit mask into Long Register
6010instruct loadUS2L_immI16(eRegL dst, memory mem, immI16 mask, eFlagsReg cr) %{
6011  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
6012  effect(KILL cr);
6013
6014  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 16-bit mask -> long\n\t"
6015            "XOR    $dst.hi,$dst.hi\n\t"
6016            "AND    $dst.lo,$mask" %}
6017  ins_encode %{
6018    Register Rdst = $dst$$Register;
6019    __ movzwl(Rdst, $mem$$Address);
6020    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6021    __ andl(Rdst, $mask$$constant);
6022  %}
6023  ins_pipe(ialu_reg_mem);
6024%}
6025
6026// Load Integer
6027instruct loadI(eRegI dst, memory mem) %{
6028  match(Set dst (LoadI mem));
6029
6030  ins_cost(125);
6031  format %{ "MOV    $dst,$mem\t# int" %}
6032
6033  ins_encode %{
6034    __ movl($dst$$Register, $mem$$Address);
6035  %}
6036
6037  ins_pipe(ialu_reg_mem);
6038%}
6039
6040// Load Integer (32 bit signed) to Byte (8 bit signed)
6041instruct loadI2B(eRegI dst, memory mem, immI_24 twentyfour) %{
6042  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
6043
6044  ins_cost(125);
6045  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
6046  ins_encode %{
6047    __ movsbl($dst$$Register, $mem$$Address);
6048  %}
6049  ins_pipe(ialu_reg_mem);
6050%}
6051
6052// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
6053instruct loadI2UB(eRegI dst, memory mem, immI_255 mask) %{
6054  match(Set dst (AndI (LoadI mem) mask));
6055
6056  ins_cost(125);
6057  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
6058  ins_encode %{
6059    __ movzbl($dst$$Register, $mem$$Address);
6060  %}
6061  ins_pipe(ialu_reg_mem);
6062%}
6063
6064// Load Integer (32 bit signed) to Short (16 bit signed)
6065instruct loadI2S(eRegI dst, memory mem, immI_16 sixteen) %{
6066  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
6067
6068  ins_cost(125);
6069  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
6070  ins_encode %{
6071    __ movswl($dst$$Register, $mem$$Address);
6072  %}
6073  ins_pipe(ialu_reg_mem);
6074%}
6075
6076// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
6077instruct loadI2US(eRegI dst, memory mem, immI_65535 mask) %{
6078  match(Set dst (AndI (LoadI mem) mask));
6079
6080  ins_cost(125);
6081  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
6082  ins_encode %{
6083    __ movzwl($dst$$Register, $mem$$Address);
6084  %}
6085  ins_pipe(ialu_reg_mem);
6086%}
6087
6088// Load Integer into Long Register
6089instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6090  match(Set dst (ConvI2L (LoadI mem)));
6091  effect(KILL cr);
6092
6093  ins_cost(375);
6094  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
6095            "MOV    $dst.hi,$dst.lo\n\t"
6096            "SAR    $dst.hi,31" %}
6097
6098  ins_encode %{
6099    __ movl($dst$$Register, $mem$$Address);
6100    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
6101    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
6102  %}
6103
6104  ins_pipe(ialu_reg_mem);
6105%}
6106
6107// Load Integer with mask 0xFF into Long Register
6108instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
6109  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6110  effect(KILL cr);
6111
6112  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
6113            "XOR    $dst.hi,$dst.hi" %}
6114  ins_encode %{
6115    Register Rdst = $dst$$Register;
6116    __ movzbl(Rdst, $mem$$Address);
6117    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6118  %}
6119  ins_pipe(ialu_reg_mem);
6120%}
6121
6122// Load Integer with mask 0xFFFF into Long Register
6123instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
6124  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6125  effect(KILL cr);
6126
6127  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
6128            "XOR    $dst.hi,$dst.hi" %}
6129  ins_encode %{
6130    Register Rdst = $dst$$Register;
6131    __ movzwl(Rdst, $mem$$Address);
6132    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6133  %}
6134  ins_pipe(ialu_reg_mem);
6135%}
6136
6137// Load Integer with 32-bit mask into Long Register
6138instruct loadI2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
6139  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
6140  effect(KILL cr);
6141
6142  format %{ "MOV    $dst.lo,$mem\t# int & 32-bit mask -> long\n\t"
6143            "XOR    $dst.hi,$dst.hi\n\t"
6144            "AND    $dst.lo,$mask" %}
6145  ins_encode %{
6146    Register Rdst = $dst$$Register;
6147    __ movl(Rdst, $mem$$Address);
6148    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
6149    __ andl(Rdst, $mask$$constant);
6150  %}
6151  ins_pipe(ialu_reg_mem);
6152%}
6153
6154// Load Unsigned Integer into Long Register
6155instruct loadUI2L(eRegL dst, memory mem, eFlagsReg cr) %{
6156  match(Set dst (LoadUI2L mem));
6157  effect(KILL cr);
6158
6159  ins_cost(250);
6160  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
6161            "XOR    $dst.hi,$dst.hi" %}
6162
6163  ins_encode %{
6164    __ movl($dst$$Register, $mem$$Address);
6165    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
6166  %}
6167
6168  ins_pipe(ialu_reg_mem);
6169%}
6170
6171// Load Long.  Cannot clobber address while loading, so restrict address
6172// register to ESI
6173instruct loadL(eRegL dst, load_long_memory mem) %{
6174  predicate(!((LoadLNode*)n)->require_atomic_access());
6175  match(Set dst (LoadL mem));
6176
6177  ins_cost(250);
6178  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
6179            "MOV    $dst.hi,$mem+4" %}
6180
6181  ins_encode %{
6182    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, false);
6183    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, false);
6184    __ movl($dst$$Register, Amemlo);
6185    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
6186  %}
6187
6188  ins_pipe(ialu_reg_long_mem);
6189%}
6190
6191// Volatile Load Long.  Must be atomic, so do 64-bit FILD
6192// then store it down to the stack and reload on the int
6193// side.
6194instruct loadL_volatile(stackSlotL dst, memory mem) %{
6195  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6196  match(Set dst (LoadL mem));
6197
6198  ins_cost(200);
6199  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6200            "FISTp  $dst" %}
6201  ins_encode(enc_loadL_volatile(mem,dst));
6202  ins_pipe( fpu_reg_mem );
6203%}
6204
6205instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
6206  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6207  match(Set dst (LoadL mem));
6208  effect(TEMP tmp);
6209  ins_cost(180);
6210  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6211            "MOVSD  $dst,$tmp" %}
6212  ins_encode %{
6213    __ movdbl($tmp$$XMMRegister, $mem$$Address);
6214    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
6215  %}
6216  ins_pipe( pipe_slow );
6217%}
6218
6219instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
6220  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6221  match(Set dst (LoadL mem));
6222  effect(TEMP tmp);
6223  ins_cost(160);
6224  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6225            "MOVD   $dst.lo,$tmp\n\t"
6226            "PSRLQ  $tmp,32\n\t"
6227            "MOVD   $dst.hi,$tmp" %}
6228  ins_encode %{
6229    __ movdbl($tmp$$XMMRegister, $mem$$Address);
6230    __ movdl($dst$$Register, $tmp$$XMMRegister);
6231    __ psrlq($tmp$$XMMRegister, 32);
6232    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
6233  %}
6234  ins_pipe( pipe_slow );
6235%}
6236
6237// Load Range
6238instruct loadRange(eRegI dst, memory mem) %{
6239  match(Set dst (LoadRange mem));
6240
6241  ins_cost(125);
6242  format %{ "MOV    $dst,$mem" %}
6243  opcode(0x8B);
6244  ins_encode( OpcP, RegMem(dst,mem));
6245  ins_pipe( ialu_reg_mem );
6246%}
6247
6248
6249// Load Pointer
6250instruct loadP(eRegP dst, memory mem) %{
6251  match(Set dst (LoadP mem));
6252
6253  ins_cost(125);
6254  format %{ "MOV    $dst,$mem" %}
6255  opcode(0x8B);
6256  ins_encode( OpcP, RegMem(dst,mem));
6257  ins_pipe( ialu_reg_mem );
6258%}
6259
6260// Load Klass Pointer
6261instruct loadKlass(eRegP dst, memory mem) %{
6262  match(Set dst (LoadKlass mem));
6263
6264  ins_cost(125);
6265  format %{ "MOV    $dst,$mem" %}
6266  opcode(0x8B);
6267  ins_encode( OpcP, RegMem(dst,mem));
6268  ins_pipe( ialu_reg_mem );
6269%}
6270
6271// Load Double
6272instruct loadDPR(regDPR dst, memory mem) %{
6273  predicate(UseSSE<=1);
6274  match(Set dst (LoadD mem));
6275
6276  ins_cost(150);
6277  format %{ "FLD_D  ST,$mem\n\t"
6278            "FSTP   $dst" %}
6279  opcode(0xDD);               /* DD /0 */
6280  ins_encode( OpcP, RMopc_Mem(0x00,mem),
6281              Pop_Reg_DPR(dst) );
6282  ins_pipe( fpu_reg_mem );
6283%}
6284
6285// Load Double to XMM
6286instruct loadD(regD dst, memory mem) %{
6287  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6288  match(Set dst (LoadD mem));
6289  ins_cost(145);
6290  format %{ "MOVSD  $dst,$mem" %}
6291  ins_encode %{
6292    __ movdbl ($dst$$XMMRegister, $mem$$Address);
6293  %}
6294  ins_pipe( pipe_slow );
6295%}
6296
6297instruct loadD_partial(regD dst, memory mem) %{
6298  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6299  match(Set dst (LoadD mem));
6300  ins_cost(145);
6301  format %{ "MOVLPD $dst,$mem" %}
6302  ins_encode %{
6303    __ movdbl ($dst$$XMMRegister, $mem$$Address);
6304  %}
6305  ins_pipe( pipe_slow );
6306%}
6307
6308// Load to XMM register (single-precision floating point)
6309// MOVSS instruction
6310instruct loadF(regF dst, memory mem) %{
6311  predicate(UseSSE>=1);
6312  match(Set dst (LoadF mem));
6313  ins_cost(145);
6314  format %{ "MOVSS  $dst,$mem" %}
6315  ins_encode %{
6316    __ movflt ($dst$$XMMRegister, $mem$$Address);
6317  %}
6318  ins_pipe( pipe_slow );
6319%}
6320
6321// Load Float
6322instruct loadFPR(regFPR dst, memory mem) %{
6323  predicate(UseSSE==0);
6324  match(Set dst (LoadF mem));
6325
6326  ins_cost(150);
6327  format %{ "FLD_S  ST,$mem\n\t"
6328            "FSTP   $dst" %}
6329  opcode(0xD9);               /* D9 /0 */
6330  ins_encode( OpcP, RMopc_Mem(0x00,mem),
6331              Pop_Reg_FPR(dst) );
6332  ins_pipe( fpu_reg_mem );
6333%}
6334
6335// Load Aligned Packed Byte to XMM register
6336instruct loadA8B(regD dst, memory mem) %{
6337  predicate(UseSSE>=1);
6338  match(Set dst (Load8B mem));
6339  ins_cost(125);
6340  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6341  ins_encode %{
6342    __ movq($dst$$XMMRegister, $mem$$Address);
6343  %}
6344  ins_pipe( pipe_slow );
6345%}
6346
6347// Load Aligned Packed Short to XMM register
6348instruct loadA4S(regD dst, memory mem) %{
6349  predicate(UseSSE>=1);
6350  match(Set dst (Load4S mem));
6351  ins_cost(125);
6352  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6353  ins_encode %{
6354    __ movq($dst$$XMMRegister, $mem$$Address);
6355  %}
6356  ins_pipe( pipe_slow );
6357%}
6358
6359// Load Aligned Packed Char to XMM register
6360instruct loadA4C(regD dst, memory mem) %{
6361  predicate(UseSSE>=1);
6362  match(Set dst (Load4C mem));
6363  ins_cost(125);
6364  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6365  ins_encode %{
6366    __ movq($dst$$XMMRegister, $mem$$Address);
6367  %}
6368  ins_pipe( pipe_slow );
6369%}
6370
6371// Load Aligned Packed Integer to XMM register
6372instruct load2IU(regD dst, memory mem) %{
6373  predicate(UseSSE>=1);
6374  match(Set dst (Load2I mem));
6375  ins_cost(125);
6376  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6377  ins_encode %{
6378    __ movq($dst$$XMMRegister, $mem$$Address);
6379  %}
6380  ins_pipe( pipe_slow );
6381%}
6382
6383// Load Aligned Packed Single to XMM
6384instruct loadA2F(regD dst, memory mem) %{
6385  predicate(UseSSE>=1);
6386  match(Set dst (Load2F mem));
6387  ins_cost(145);
6388  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6389  ins_encode %{
6390    __ movq($dst$$XMMRegister, $mem$$Address);
6391  %}
6392  ins_pipe( pipe_slow );
6393%}
6394
6395// Load Effective Address
6396instruct leaP8(eRegP dst, indOffset8 mem) %{
6397  match(Set dst mem);
6398
6399  ins_cost(110);
6400  format %{ "LEA    $dst,$mem" %}
6401  opcode(0x8D);
6402  ins_encode( OpcP, RegMem(dst,mem));
6403  ins_pipe( ialu_reg_reg_fat );
6404%}
6405
6406instruct leaP32(eRegP dst, indOffset32 mem) %{
6407  match(Set dst mem);
6408
6409  ins_cost(110);
6410  format %{ "LEA    $dst,$mem" %}
6411  opcode(0x8D);
6412  ins_encode( OpcP, RegMem(dst,mem));
6413  ins_pipe( ialu_reg_reg_fat );
6414%}
6415
6416instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6417  match(Set dst mem);
6418
6419  ins_cost(110);
6420  format %{ "LEA    $dst,$mem" %}
6421  opcode(0x8D);
6422  ins_encode( OpcP, RegMem(dst,mem));
6423  ins_pipe( ialu_reg_reg_fat );
6424%}
6425
6426instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6427  match(Set dst mem);
6428
6429  ins_cost(110);
6430  format %{ "LEA    $dst,$mem" %}
6431  opcode(0x8D);
6432  ins_encode( OpcP, RegMem(dst,mem));
6433  ins_pipe( ialu_reg_reg_fat );
6434%}
6435
6436instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6437  match(Set dst mem);
6438
6439  ins_cost(110);
6440  format %{ "LEA    $dst,$mem" %}
6441  opcode(0x8D);
6442  ins_encode( OpcP, RegMem(dst,mem));
6443  ins_pipe( ialu_reg_reg_fat );
6444%}
6445
6446// Load Constant
6447instruct loadConI(eRegI dst, immI src) %{
6448  match(Set dst src);
6449
6450  format %{ "MOV    $dst,$src" %}
6451  ins_encode( LdImmI(dst, src) );
6452  ins_pipe( ialu_reg_fat );
6453%}
6454
6455// Load Constant zero
6456instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
6457  match(Set dst src);
6458  effect(KILL cr);
6459
6460  ins_cost(50);
6461  format %{ "XOR    $dst,$dst" %}
6462  opcode(0x33);  /* + rd */
6463  ins_encode( OpcP, RegReg( dst, dst ) );
6464  ins_pipe( ialu_reg );
6465%}
6466
6467instruct loadConP(eRegP dst, immP src) %{
6468  match(Set dst src);
6469
6470  format %{ "MOV    $dst,$src" %}
6471  opcode(0xB8);  /* + rd */
6472  ins_encode( LdImmP(dst, src) );
6473  ins_pipe( ialu_reg_fat );
6474%}
6475
6476instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6477  match(Set dst src);
6478  effect(KILL cr);
6479  ins_cost(200);
6480  format %{ "MOV    $dst.lo,$src.lo\n\t"
6481            "MOV    $dst.hi,$src.hi" %}
6482  opcode(0xB8);
6483  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6484  ins_pipe( ialu_reg_long_fat );
6485%}
6486
6487instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6488  match(Set dst src);
6489  effect(KILL cr);
6490  ins_cost(150);
6491  format %{ "XOR    $dst.lo,$dst.lo\n\t"
6492            "XOR    $dst.hi,$dst.hi" %}
6493  opcode(0x33,0x33);
6494  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6495  ins_pipe( ialu_reg_long );
6496%}
6497
6498// The instruction usage is guarded by predicate in operand immFPR().
6499instruct loadConFPR(regFPR dst, immFPR con) %{
6500  match(Set dst con);
6501  ins_cost(125);
6502  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
6503            "FSTP   $dst" %}
6504  ins_encode %{
6505    __ fld_s($constantaddress($con));
6506    __ fstp_d($dst$$reg);
6507  %}
6508  ins_pipe(fpu_reg_con);
6509%}
6510
6511// The instruction usage is guarded by predicate in operand immFPR0().
6512instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6513  match(Set dst con);
6514  ins_cost(125);
6515  format %{ "FLDZ   ST\n\t"
6516            "FSTP   $dst" %}
6517  ins_encode %{
6518    __ fldz();
6519    __ fstp_d($dst$$reg);
6520  %}
6521  ins_pipe(fpu_reg_con);
6522%}
6523
6524// The instruction usage is guarded by predicate in operand immFPR1().
6525instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6526  match(Set dst con);
6527  ins_cost(125);
6528  format %{ "FLD1   ST\n\t"
6529            "FSTP   $dst" %}
6530  ins_encode %{
6531    __ fld1();
6532    __ fstp_d($dst$$reg);
6533  %}
6534  ins_pipe(fpu_reg_con);
6535%}
6536
6537// The instruction usage is guarded by predicate in operand immF().
6538instruct loadConF(regF dst, immF con) %{
6539  match(Set dst con);
6540  ins_cost(125);
6541  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6542  ins_encode %{
6543    __ movflt($dst$$XMMRegister, $constantaddress($con));
6544  %}
6545  ins_pipe(pipe_slow);
6546%}
6547
6548// The instruction usage is guarded by predicate in operand immF0().
6549instruct loadConF0(regF dst, immF0 src) %{
6550  match(Set dst src);
6551  ins_cost(100);
6552  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6553  ins_encode %{
6554    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6555  %}
6556  ins_pipe(pipe_slow);
6557%}
6558
6559// The instruction usage is guarded by predicate in operand immDPR().
6560instruct loadConDPR(regDPR dst, immDPR con) %{
6561  match(Set dst con);
6562  ins_cost(125);
6563
6564  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6565            "FSTP   $dst" %}
6566  ins_encode %{
6567    __ fld_d($constantaddress($con));
6568    __ fstp_d($dst$$reg);
6569  %}
6570  ins_pipe(fpu_reg_con);
6571%}
6572
6573// The instruction usage is guarded by predicate in operand immDPR0().
6574instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6575  match(Set dst con);
6576  ins_cost(125);
6577
6578  format %{ "FLDZ   ST\n\t"
6579            "FSTP   $dst" %}
6580  ins_encode %{
6581    __ fldz();
6582    __ fstp_d($dst$$reg);
6583  %}
6584  ins_pipe(fpu_reg_con);
6585%}
6586
6587// The instruction usage is guarded by predicate in operand immDPR1().
6588instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6589  match(Set dst con);
6590  ins_cost(125);
6591
6592  format %{ "FLD1   ST\n\t"
6593            "FSTP   $dst" %}
6594  ins_encode %{
6595    __ fld1();
6596    __ fstp_d($dst$$reg);
6597  %}
6598  ins_pipe(fpu_reg_con);
6599%}
6600
6601// The instruction usage is guarded by predicate in operand immD().
6602instruct loadConD(regD dst, immD con) %{
6603  match(Set dst con);
6604  ins_cost(125);
6605  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6606  ins_encode %{
6607    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6608  %}
6609  ins_pipe(pipe_slow);
6610%}
6611
6612// The instruction usage is guarded by predicate in operand immD0().
6613instruct loadConD0(regD dst, immD0 src) %{
6614  match(Set dst src);
6615  ins_cost(100);
6616  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6617  ins_encode %{
6618    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6619  %}
6620  ins_pipe( pipe_slow );
6621%}
6622
6623// Load Stack Slot
6624instruct loadSSI(eRegI dst, stackSlotI src) %{
6625  match(Set dst src);
6626  ins_cost(125);
6627
6628  format %{ "MOV    $dst,$src" %}
6629  opcode(0x8B);
6630  ins_encode( OpcP, RegMem(dst,src));
6631  ins_pipe( ialu_reg_mem );
6632%}
6633
6634instruct loadSSL(eRegL dst, stackSlotL src) %{
6635  match(Set dst src);
6636
6637  ins_cost(200);
6638  format %{ "MOV    $dst,$src.lo\n\t"
6639            "MOV    $dst+4,$src.hi" %}
6640  opcode(0x8B, 0x8B);
6641  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6642  ins_pipe( ialu_mem_long_reg );
6643%}
6644
6645// Load Stack Slot
6646instruct loadSSP(eRegP dst, stackSlotP src) %{
6647  match(Set dst src);
6648  ins_cost(125);
6649
6650  format %{ "MOV    $dst,$src" %}
6651  opcode(0x8B);
6652  ins_encode( OpcP, RegMem(dst,src));
6653  ins_pipe( ialu_reg_mem );
6654%}
6655
6656// Load Stack Slot
6657instruct loadSSF(regFPR dst, stackSlotF src) %{
6658  match(Set dst src);
6659  ins_cost(125);
6660
6661  format %{ "FLD_S  $src\n\t"
6662            "FSTP   $dst" %}
6663  opcode(0xD9);               /* D9 /0, FLD m32real */
6664  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6665              Pop_Reg_FPR(dst) );
6666  ins_pipe( fpu_reg_mem );
6667%}
6668
6669// Load Stack Slot
6670instruct loadSSD(regDPR dst, stackSlotD src) %{
6671  match(Set dst src);
6672  ins_cost(125);
6673
6674  format %{ "FLD_D  $src\n\t"
6675            "FSTP   $dst" %}
6676  opcode(0xDD);               /* DD /0, FLD m64real */
6677  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6678              Pop_Reg_DPR(dst) );
6679  ins_pipe( fpu_reg_mem );
6680%}
6681
6682// Prefetch instructions.
6683// Must be safe to execute with invalid address (cannot fault).
6684
6685instruct prefetchr0( memory mem ) %{
6686  predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6687  match(PrefetchRead mem);
6688  ins_cost(0);
6689  size(0);
6690  format %{ "PREFETCHR (non-SSE is empty encoding)" %}
6691  ins_encode();
6692  ins_pipe(empty);
6693%}
6694
6695instruct prefetchr( memory mem ) %{
6696  predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch() || ReadPrefetchInstr==3);
6697  match(PrefetchRead mem);
6698  ins_cost(100);
6699
6700  format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
6701  ins_encode %{
6702    __ prefetchr($mem$$Address);
6703  %}
6704  ins_pipe(ialu_mem);
6705%}
6706
6707instruct prefetchrNTA( memory mem ) %{
6708  predicate(UseSSE>=1 && ReadPrefetchInstr==0);
6709  match(PrefetchRead mem);
6710  ins_cost(100);
6711
6712  format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
6713  ins_encode %{
6714    __ prefetchnta($mem$$Address);
6715  %}
6716  ins_pipe(ialu_mem);
6717%}
6718
6719instruct prefetchrT0( memory mem ) %{
6720  predicate(UseSSE>=1 && ReadPrefetchInstr==1);
6721  match(PrefetchRead mem);
6722  ins_cost(100);
6723
6724  format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
6725  ins_encode %{
6726    __ prefetcht0($mem$$Address);
6727  %}
6728  ins_pipe(ialu_mem);
6729%}
6730
6731instruct prefetchrT2( memory mem ) %{
6732  predicate(UseSSE>=1 && ReadPrefetchInstr==2);
6733  match(PrefetchRead mem);
6734  ins_cost(100);
6735
6736  format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
6737  ins_encode %{
6738    __ prefetcht2($mem$$Address);
6739  %}
6740  ins_pipe(ialu_mem);
6741%}
6742
6743instruct prefetchw0( memory mem ) %{
6744  predicate(UseSSE==0 && !VM_Version::supports_3dnow_prefetch());
6745  match(PrefetchWrite mem);
6746  ins_cost(0);
6747  size(0);
6748  format %{ "Prefetch (non-SSE is empty encoding)" %}
6749  ins_encode();
6750  ins_pipe(empty);
6751%}
6752
6753instruct prefetchw( memory mem ) %{
6754  predicate(UseSSE==0 && VM_Version::supports_3dnow_prefetch());
6755  match( PrefetchWrite mem );
6756  ins_cost(100);
6757
6758  format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
6759  ins_encode %{
6760    __ prefetchw($mem$$Address);
6761  %}
6762  ins_pipe(ialu_mem);
6763%}
6764
6765instruct prefetchwNTA( memory mem ) %{
6766  predicate(UseSSE>=1);
6767  match(PrefetchWrite mem);
6768  ins_cost(100);
6769
6770  format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
6771  ins_encode %{
6772    __ prefetchnta($mem$$Address);
6773  %}
6774  ins_pipe(ialu_mem);
6775%}
6776
6777// Prefetch instructions for allocation.
6778
6779instruct prefetchAlloc0( memory mem ) %{
6780  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6781  match(PrefetchAllocation mem);
6782  ins_cost(0);
6783  size(0);
6784  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6785  ins_encode();
6786  ins_pipe(empty);
6787%}
6788
6789instruct prefetchAlloc( memory mem ) %{
6790  predicate(AllocatePrefetchInstr==3);
6791  match( PrefetchAllocation mem );
6792  ins_cost(100);
6793
6794  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6795  ins_encode %{
6796    __ prefetchw($mem$$Address);
6797  %}
6798  ins_pipe(ialu_mem);
6799%}
6800
6801instruct prefetchAllocNTA( memory mem ) %{
6802  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6803  match(PrefetchAllocation mem);
6804  ins_cost(100);
6805
6806  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6807  ins_encode %{
6808    __ prefetchnta($mem$$Address);
6809  %}
6810  ins_pipe(ialu_mem);
6811%}
6812
6813instruct prefetchAllocT0( memory mem ) %{
6814  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6815  match(PrefetchAllocation mem);
6816  ins_cost(100);
6817
6818  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6819  ins_encode %{
6820    __ prefetcht0($mem$$Address);
6821  %}
6822  ins_pipe(ialu_mem);
6823%}
6824
6825instruct prefetchAllocT2( memory mem ) %{
6826  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6827  match(PrefetchAllocation mem);
6828  ins_cost(100);
6829
6830  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6831  ins_encode %{
6832    __ prefetcht2($mem$$Address);
6833  %}
6834  ins_pipe(ialu_mem);
6835%}
6836
6837//----------Store Instructions-------------------------------------------------
6838
6839// Store Byte
6840instruct storeB(memory mem, xRegI src) %{
6841  match(Set mem (StoreB mem src));
6842
6843  ins_cost(125);
6844  format %{ "MOV8   $mem,$src" %}
6845  opcode(0x88);
6846  ins_encode( OpcP, RegMem( src, mem ) );
6847  ins_pipe( ialu_mem_reg );
6848%}
6849
6850// Store Char/Short
6851instruct storeC(memory mem, eRegI src) %{
6852  match(Set mem (StoreC mem src));
6853
6854  ins_cost(125);
6855  format %{ "MOV16  $mem,$src" %}
6856  opcode(0x89, 0x66);
6857  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6858  ins_pipe( ialu_mem_reg );
6859%}
6860
6861// Store Integer
6862instruct storeI(memory mem, eRegI src) %{
6863  match(Set mem (StoreI mem src));
6864
6865  ins_cost(125);
6866  format %{ "MOV    $mem,$src" %}
6867  opcode(0x89);
6868  ins_encode( OpcP, RegMem( src, mem ) );
6869  ins_pipe( ialu_mem_reg );
6870%}
6871
6872// Store Long
6873instruct storeL(long_memory mem, eRegL src) %{
6874  predicate(!((StoreLNode*)n)->require_atomic_access());
6875  match(Set mem (StoreL mem src));
6876
6877  ins_cost(200);
6878  format %{ "MOV    $mem,$src.lo\n\t"
6879            "MOV    $mem+4,$src.hi" %}
6880  opcode(0x89, 0x89);
6881  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6882  ins_pipe( ialu_mem_long_reg );
6883%}
6884
6885// Store Long to Integer
6886instruct storeL2I(memory mem, eRegL src) %{
6887  match(Set mem (StoreI mem (ConvL2I src)));
6888
6889  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6890  ins_encode %{
6891    __ movl($mem$$Address, $src$$Register);
6892  %}
6893  ins_pipe(ialu_mem_reg);
6894%}
6895
6896// Volatile Store Long.  Must be atomic, so move it into
6897// the FP TOS and then do a 64-bit FIST.  Has to probe the
6898// target address before the store (for null-ptr checks)
6899// so the memory operand is used twice in the encoding.
6900instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6901  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6902  match(Set mem (StoreL mem src));
6903  effect( KILL cr );
6904  ins_cost(400);
6905  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6906            "FILD   $src\n\t"
6907            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6908  opcode(0x3B);
6909  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6910  ins_pipe( fpu_reg_mem );
6911%}
6912
6913instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6914  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6915  match(Set mem (StoreL mem src));
6916  effect( TEMP tmp, KILL cr );
6917  ins_cost(380);
6918  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6919            "MOVSD  $tmp,$src\n\t"
6920            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6921  ins_encode %{
6922    __ cmpl(rax, $mem$$Address);
6923    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6924    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6925  %}
6926  ins_pipe( pipe_slow );
6927%}
6928
6929instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6930  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6931  match(Set mem (StoreL mem src));
6932  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6933  ins_cost(360);
6934  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6935            "MOVD   $tmp,$src.lo\n\t"
6936            "MOVD   $tmp2,$src.hi\n\t"
6937            "PUNPCKLDQ $tmp,$tmp2\n\t"
6938            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6939  ins_encode %{
6940    __ cmpl(rax, $mem$$Address);
6941    __ movdl($tmp$$XMMRegister, $src$$Register);
6942    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6943    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6944    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6945  %}
6946  ins_pipe( pipe_slow );
6947%}
6948
6949// Store Pointer; for storing unknown oops and raw pointers
6950instruct storeP(memory mem, anyRegP src) %{
6951  match(Set mem (StoreP mem src));
6952
6953  ins_cost(125);
6954  format %{ "MOV    $mem,$src" %}
6955  opcode(0x89);
6956  ins_encode( OpcP, RegMem( src, mem ) );
6957  ins_pipe( ialu_mem_reg );
6958%}
6959
6960// Store Integer Immediate
6961instruct storeImmI(memory mem, immI src) %{
6962  match(Set mem (StoreI mem src));
6963
6964  ins_cost(150);
6965  format %{ "MOV    $mem,$src" %}
6966  opcode(0xC7);               /* C7 /0 */
6967  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6968  ins_pipe( ialu_mem_imm );
6969%}
6970
6971// Store Short/Char Immediate
6972instruct storeImmI16(memory mem, immI16 src) %{
6973  predicate(UseStoreImmI16);
6974  match(Set mem (StoreC mem src));
6975
6976  ins_cost(150);
6977  format %{ "MOV16  $mem,$src" %}
6978  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6979  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6980  ins_pipe( ialu_mem_imm );
6981%}
6982
6983// Store Pointer Immediate; null pointers or constant oops that do not
6984// need card-mark barriers.
6985instruct storeImmP(memory mem, immP src) %{
6986  match(Set mem (StoreP mem src));
6987
6988  ins_cost(150);
6989  format %{ "MOV    $mem,$src" %}
6990  opcode(0xC7);               /* C7 /0 */
6991  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6992  ins_pipe( ialu_mem_imm );
6993%}
6994
6995// Store Byte Immediate
6996instruct storeImmB(memory mem, immI8 src) %{
6997  match(Set mem (StoreB mem src));
6998
6999  ins_cost(150);
7000  format %{ "MOV8   $mem,$src" %}
7001  opcode(0xC6);               /* C6 /0 */
7002  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7003  ins_pipe( ialu_mem_imm );
7004%}
7005
7006// Store Aligned Packed Byte XMM register to memory
7007instruct storeA8B(memory mem, regD src) %{
7008  predicate(UseSSE>=1);
7009  match(Set mem (Store8B mem src));
7010  ins_cost(145);
7011  format %{ "MOVQ  $mem,$src\t! packed8B" %}
7012  ins_encode %{
7013    __ movq($mem$$Address, $src$$XMMRegister);
7014  %}
7015  ins_pipe( pipe_slow );
7016%}
7017
7018// Store Aligned Packed Char/Short XMM register to memory
7019instruct storeA4C(memory mem, regD src) %{
7020  predicate(UseSSE>=1);
7021  match(Set mem (Store4C mem src));
7022  ins_cost(145);
7023  format %{ "MOVQ  $mem,$src\t! packed4C" %}
7024  ins_encode %{
7025    __ movq($mem$$Address, $src$$XMMRegister);
7026  %}
7027  ins_pipe( pipe_slow );
7028%}
7029
7030// Store Aligned Packed Integer XMM register to memory
7031instruct storeA2I(memory mem, regD src) %{
7032  predicate(UseSSE>=1);
7033  match(Set mem (Store2I mem src));
7034  ins_cost(145);
7035  format %{ "MOVQ  $mem,$src\t! packed2I" %}
7036  ins_encode %{
7037    __ movq($mem$$Address, $src$$XMMRegister);
7038  %}
7039  ins_pipe( pipe_slow );
7040%}
7041
7042// Store CMS card-mark Immediate
7043instruct storeImmCM(memory mem, immI8 src) %{
7044  match(Set mem (StoreCM mem src));
7045
7046  ins_cost(150);
7047  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7048  opcode(0xC6);               /* C6 /0 */
7049  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7050  ins_pipe( ialu_mem_imm );
7051%}
7052
7053// Store Double
7054instruct storeDPR( memory mem, regDPR1 src) %{
7055  predicate(UseSSE<=1);
7056  match(Set mem (StoreD mem src));
7057
7058  ins_cost(100);
7059  format %{ "FST_D  $mem,$src" %}
7060  opcode(0xDD);       /* DD /2 */
7061  ins_encode( enc_FPR_store(mem,src) );
7062  ins_pipe( fpu_mem_reg );
7063%}
7064
7065// Store double does rounding on x86
7066instruct storeDPR_rounded( memory mem, regDPR1 src) %{
7067  predicate(UseSSE<=1);
7068  match(Set mem (StoreD mem (RoundDouble src)));
7069
7070  ins_cost(100);
7071  format %{ "FST_D  $mem,$src\t# round" %}
7072  opcode(0xDD);       /* DD /2 */
7073  ins_encode( enc_FPR_store(mem,src) );
7074  ins_pipe( fpu_mem_reg );
7075%}
7076
7077// Store XMM register to memory (double-precision floating points)
7078// MOVSD instruction
7079instruct storeD(memory mem, regD src) %{
7080  predicate(UseSSE>=2);
7081  match(Set mem (StoreD mem src));
7082  ins_cost(95);
7083  format %{ "MOVSD  $mem,$src" %}
7084  ins_encode %{
7085    __ movdbl($mem$$Address, $src$$XMMRegister);
7086  %}
7087  ins_pipe( pipe_slow );
7088%}
7089
7090// Store XMM register to memory (single-precision floating point)
7091// MOVSS instruction
7092instruct storeF(memory mem, regF src) %{
7093  predicate(UseSSE>=1);
7094  match(Set mem (StoreF mem src));
7095  ins_cost(95);
7096  format %{ "MOVSS  $mem,$src" %}
7097  ins_encode %{
7098    __ movflt($mem$$Address, $src$$XMMRegister);
7099  %}
7100  ins_pipe( pipe_slow );
7101%}
7102
7103// Store Aligned Packed Single Float XMM register to memory
7104instruct storeA2F(memory mem, regD src) %{
7105  predicate(UseSSE>=1);
7106  match(Set mem (Store2F mem src));
7107  ins_cost(145);
7108  format %{ "MOVQ  $mem,$src\t! packed2F" %}
7109  ins_encode %{
7110    __ movq($mem$$Address, $src$$XMMRegister);
7111  %}
7112  ins_pipe( pipe_slow );
7113%}
7114
7115// Store Float
7116instruct storeFPR( memory mem, regFPR1 src) %{
7117  predicate(UseSSE==0);
7118  match(Set mem (StoreF mem src));
7119
7120  ins_cost(100);
7121  format %{ "FST_S  $mem,$src" %}
7122  opcode(0xD9);       /* D9 /2 */
7123  ins_encode( enc_FPR_store(mem,src) );
7124  ins_pipe( fpu_mem_reg );
7125%}
7126
7127// Store Float does rounding on x86
7128instruct storeFPR_rounded( memory mem, regFPR1 src) %{
7129  predicate(UseSSE==0);
7130  match(Set mem (StoreF mem (RoundFloat src)));
7131
7132  ins_cost(100);
7133  format %{ "FST_S  $mem,$src\t# round" %}
7134  opcode(0xD9);       /* D9 /2 */
7135  ins_encode( enc_FPR_store(mem,src) );
7136  ins_pipe( fpu_mem_reg );
7137%}
7138
7139// Store Float does rounding on x86
7140instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
7141  predicate(UseSSE<=1);
7142  match(Set mem (StoreF mem (ConvD2F src)));
7143
7144  ins_cost(100);
7145  format %{ "FST_S  $mem,$src\t# D-round" %}
7146  opcode(0xD9);       /* D9 /2 */
7147  ins_encode( enc_FPR_store(mem,src) );
7148  ins_pipe( fpu_mem_reg );
7149%}
7150
7151// Store immediate Float value (it is faster than store from FPU register)
7152// The instruction usage is guarded by predicate in operand immFPR().
7153instruct storeFPR_imm( memory mem, immFPR src) %{
7154  match(Set mem (StoreF mem src));
7155
7156  ins_cost(50);
7157  format %{ "MOV    $mem,$src\t# store float" %}
7158  opcode(0xC7);               /* C7 /0 */
7159  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
7160  ins_pipe( ialu_mem_imm );
7161%}
7162
7163// Store immediate Float value (it is faster than store from XMM register)
7164// The instruction usage is guarded by predicate in operand immF().
7165instruct storeF_imm( memory mem, immF src) %{
7166  match(Set mem (StoreF mem src));
7167
7168  ins_cost(50);
7169  format %{ "MOV    $mem,$src\t# store float" %}
7170  opcode(0xC7);               /* C7 /0 */
7171  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7172  ins_pipe( ialu_mem_imm );
7173%}
7174
7175// Store Integer to stack slot
7176instruct storeSSI(stackSlotI dst, eRegI src) %{
7177  match(Set dst src);
7178
7179  ins_cost(100);
7180  format %{ "MOV    $dst,$src" %}
7181  opcode(0x89);
7182  ins_encode( OpcPRegSS( dst, src ) );
7183  ins_pipe( ialu_mem_reg );
7184%}
7185
7186// Store Integer to stack slot
7187instruct storeSSP(stackSlotP dst, eRegP src) %{
7188  match(Set dst src);
7189
7190  ins_cost(100);
7191  format %{ "MOV    $dst,$src" %}
7192  opcode(0x89);
7193  ins_encode( OpcPRegSS( dst, src ) );
7194  ins_pipe( ialu_mem_reg );
7195%}
7196
7197// Store Long to stack slot
7198instruct storeSSL(stackSlotL dst, eRegL src) %{
7199  match(Set dst src);
7200
7201  ins_cost(200);
7202  format %{ "MOV    $dst,$src.lo\n\t"
7203            "MOV    $dst+4,$src.hi" %}
7204  opcode(0x89, 0x89);
7205  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7206  ins_pipe( ialu_mem_long_reg );
7207%}
7208
7209//----------MemBar Instructions-----------------------------------------------
7210// Memory barrier flavors
7211
7212instruct membar_acquire() %{
7213  match(MemBarAcquire);
7214  ins_cost(400);
7215
7216  size(0);
7217  format %{ "MEMBAR-acquire ! (empty encoding)" %}
7218  ins_encode();
7219  ins_pipe(empty);
7220%}
7221
7222instruct membar_acquire_lock() %{
7223  match(MemBarAcquireLock);
7224  ins_cost(0);
7225
7226  size(0);
7227  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7228  ins_encode( );
7229  ins_pipe(empty);
7230%}
7231
7232instruct membar_release() %{
7233  match(MemBarRelease);
7234  ins_cost(400);
7235
7236  size(0);
7237  format %{ "MEMBAR-release ! (empty encoding)" %}
7238  ins_encode( );
7239  ins_pipe(empty);
7240%}
7241
7242instruct membar_release_lock() %{
7243  match(MemBarReleaseLock);
7244  ins_cost(0);
7245
7246  size(0);
7247  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7248  ins_encode( );
7249  ins_pipe(empty);
7250%}
7251
7252instruct membar_volatile(eFlagsReg cr) %{
7253  match(MemBarVolatile);
7254  effect(KILL cr);
7255  ins_cost(400);
7256
7257  format %{ 
7258    $$template
7259    if (os::is_MP()) {
7260      $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
7261    } else {
7262      $$emit$$"MEMBAR-volatile ! (empty encoding)"
7263    }
7264  %}
7265  ins_encode %{
7266    __ membar(Assembler::StoreLoad);
7267  %}
7268  ins_pipe(pipe_slow);
7269%}
7270
7271instruct unnecessary_membar_volatile() %{
7272  match(MemBarVolatile);
7273  predicate(Matcher::post_store_load_barrier(n));
7274  ins_cost(0);
7275
7276  size(0);
7277  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7278  ins_encode( );
7279  ins_pipe(empty);
7280%}
7281
7282instruct membar_storestore() %{
7283  match(MemBarStoreStore);
7284  ins_cost(0);
7285
7286  size(0);
7287  format %{ "MEMBAR-storestore (empty encoding)" %}
7288  ins_encode( );
7289  ins_pipe(empty);
7290%}
7291
7292//----------Move Instructions--------------------------------------------------
7293instruct castX2P(eAXRegP dst, eAXRegI src) %{
7294  match(Set dst (CastX2P src));
7295  format %{ "# X2P  $dst, $src" %}
7296  ins_encode( /*empty encoding*/ );
7297  ins_cost(0);
7298  ins_pipe(empty);
7299%}
7300
7301instruct castP2X(eRegI dst, eRegP src ) %{
7302  match(Set dst (CastP2X src));
7303  ins_cost(50);
7304  format %{ "MOV    $dst, $src\t# CastP2X" %}
7305  ins_encode( enc_Copy( dst, src) );
7306  ins_pipe( ialu_reg_reg );
7307%}
7308
7309//----------Conditional Move---------------------------------------------------
7310// Conditional move
7311instruct jmovI_reg(cmpOp cop, eFlagsReg cr, eRegI dst, eRegI src) %{
7312  predicate(!VM_Version::supports_cmov() );
7313  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7314  ins_cost(200);
7315  format %{ "J$cop,us skip\t# signed cmove\n\t"
7316            "MOV    $dst,$src\n"
7317      "skip:" %}
7318  ins_encode %{
7319    Label Lskip;
7320    // Invert sense of branch from sense of CMOV
7321    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7322    __ movl($dst$$Register, $src$$Register);
7323    __ bind(Lskip);
7324  %}
7325  ins_pipe( pipe_cmov_reg );
7326%}
7327
7328instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src) %{
7329  predicate(!VM_Version::supports_cmov() );
7330  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7331  ins_cost(200);
7332  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
7333            "MOV    $dst,$src\n"
7334      "skip:" %}
7335  ins_encode %{
7336    Label Lskip;
7337    // Invert sense of branch from sense of CMOV
7338    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
7339    __ movl($dst$$Register, $src$$Register);
7340    __ bind(Lskip);
7341  %}
7342  ins_pipe( pipe_cmov_reg );
7343%}
7344
7345instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7346  predicate(VM_Version::supports_cmov() );
7347  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7348  ins_cost(200);
7349  format %{ "CMOV$cop $dst,$src" %}
7350  opcode(0x0F,0x40);
7351  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7352  ins_pipe( pipe_cmov_reg );
7353%}
7354
7355instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7356  predicate(VM_Version::supports_cmov() );
7357  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7358  ins_cost(200);
7359  format %{ "CMOV$cop $dst,$src" %}
7360  opcode(0x0F,0x40);
7361  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7362  ins_pipe( pipe_cmov_reg );
7363%}
7364
7365instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7366  predicate(VM_Version::supports_cmov() );
7367  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7368  ins_cost(200);
7369  expand %{
7370    cmovI_regU(cop, cr, dst, src);
7371  %}
7372%}
7373
7374// Conditional move
7375instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7376  predicate(VM_Version::supports_cmov() );
7377  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7378  ins_cost(250);
7379  format %{ "CMOV$cop $dst,$src" %}
7380  opcode(0x0F,0x40);
7381  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7382  ins_pipe( pipe_cmov_mem );
7383%}
7384
7385// Conditional move
7386instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7387  predicate(VM_Version::supports_cmov() );
7388  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7389  ins_cost(250);
7390  format %{ "CMOV$cop $dst,$src" %}
7391  opcode(0x0F,0x40);
7392  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7393  ins_pipe( pipe_cmov_mem );
7394%}
7395
7396instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7397  predicate(VM_Version::supports_cmov() );
7398  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7399  ins_cost(250);
7400  expand %{
7401    cmovI_memU(cop, cr, dst, src);
7402  %}
7403%}
7404
7405// Conditional move
7406instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7407  predicate(VM_Version::supports_cmov() );
7408  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7409  ins_cost(200);
7410  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7411  opcode(0x0F,0x40);
7412  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7413  ins_pipe( pipe_cmov_reg );
7414%}
7415
7416// Conditional move (non-P6 version)
7417// Note:  a CMoveP is generated for  stubs and native wrappers
7418//        regardless of whether we are on a P6, so we
7419//        emulate a cmov here
7420instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7421  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7422  ins_cost(300);
7423  format %{ "Jn$cop   skip\n\t"
7424          "MOV    $dst,$src\t# pointer\n"
7425      "skip:" %}
7426  opcode(0x8b);
7427  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7428  ins_pipe( pipe_cmov_reg );
7429%}
7430
7431// Conditional move
7432instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7433  predicate(VM_Version::supports_cmov() );
7434  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7435  ins_cost(200);
7436  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7437  opcode(0x0F,0x40);
7438  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7439  ins_pipe( pipe_cmov_reg );
7440%}
7441
7442instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7443  predicate(VM_Version::supports_cmov() );
7444  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7445  ins_cost(200);
7446  expand %{
7447    cmovP_regU(cop, cr, dst, src);
7448  %}
7449%}
7450
7451// DISABLED: Requires the ADLC to emit a bottom_type call that
7452// correctly meets the two pointer arguments; one is an incoming
7453// register but the other is a memory operand.  ALSO appears to
7454// be buggy with implicit null checks.
7455//
7456//// Conditional move
7457//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
7458//  predicate(VM_Version::supports_cmov() );
7459//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7460//  ins_cost(250);
7461//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7462//  opcode(0x0F,0x40);
7463//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7464//  ins_pipe( pipe_cmov_mem );
7465//%}
7466//
7467//// Conditional move
7468//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7469//  predicate(VM_Version::supports_cmov() );
7470//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7471//  ins_cost(250);
7472//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7473//  opcode(0x0F,0x40);
7474//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7475//  ins_pipe( pipe_cmov_mem );
7476//%}
7477
7478// Conditional move
7479instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
7480  predicate(UseSSE<=1);
7481  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7482  ins_cost(200);
7483  format %{ "FCMOV$cop $dst,$src\t# double" %}
7484  opcode(0xDA);
7485  ins_encode( enc_cmov_dpr(cop,src) );
7486  ins_pipe( pipe_cmovDPR_reg );
7487%}
7488
7489// Conditional move
7490instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
7491  predicate(UseSSE==0);
7492  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7493  ins_cost(200);
7494  format %{ "FCMOV$cop $dst,$src\t# float" %}
7495  opcode(0xDA);
7496  ins_encode( enc_cmov_dpr(cop,src) );
7497  ins_pipe( pipe_cmovDPR_reg );
7498%}
7499
7500// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7501instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
7502  predicate(UseSSE<=1);
7503  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7504  ins_cost(200);
7505  format %{ "Jn$cop   skip\n\t"
7506            "MOV    $dst,$src\t# double\n"
7507      "skip:" %}
7508  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7509  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
7510  ins_pipe( pipe_cmovDPR_reg );
7511%}
7512
7513// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7514instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
7515  predicate(UseSSE==0);
7516  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7517  ins_cost(200);
7518  format %{ "Jn$cop    skip\n\t"
7519            "MOV    $dst,$src\t# float\n"
7520      "skip:" %}
7521  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7522  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
7523  ins_pipe( pipe_cmovDPR_reg );
7524%}
7525
7526// No CMOVE with SSE/SSE2
7527instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7528  predicate (UseSSE>=1);
7529  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7530  ins_cost(200);
7531  format %{ "Jn$cop   skip\n\t"
7532            "MOVSS  $dst,$src\t# float\n"
7533      "skip:" %}
7534  ins_encode %{
7535    Label skip;
7536    // Invert sense of branch from sense of CMOV
7537    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7538    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7539    __ bind(skip);
7540  %}
7541  ins_pipe( pipe_slow );
7542%}
7543
7544// No CMOVE with SSE/SSE2
7545instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7546  predicate (UseSSE>=2);
7547  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7548  ins_cost(200);
7549  format %{ "Jn$cop   skip\n\t"
7550            "MOVSD  $dst,$src\t# float\n"
7551      "skip:" %}
7552  ins_encode %{
7553    Label skip;
7554    // Invert sense of branch from sense of CMOV
7555    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7556    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7557    __ bind(skip);
7558  %}
7559  ins_pipe( pipe_slow );
7560%}
7561
7562// unsigned version
7563instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
7564  predicate (UseSSE>=1);
7565  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7566  ins_cost(200);
7567  format %{ "Jn$cop   skip\n\t"
7568            "MOVSS  $dst,$src\t# float\n"
7569      "skip:" %}
7570  ins_encode %{
7571    Label skip;
7572    // Invert sense of branch from sense of CMOV
7573    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7574    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7575    __ bind(skip);
7576  %}
7577  ins_pipe( pipe_slow );
7578%}
7579
7580instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
7581  predicate (UseSSE>=1);
7582  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7583  ins_cost(200);
7584  expand %{
7585    fcmovF_regU(cop, cr, dst, src);
7586  %}
7587%}
7588
7589// unsigned version
7590instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
7591  predicate (UseSSE>=2);
7592  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7593  ins_cost(200);
7594  format %{ "Jn$cop   skip\n\t"
7595            "MOVSD  $dst,$src\t# float\n"
7596      "skip:" %}
7597  ins_encode %{
7598    Label skip;
7599    // Invert sense of branch from sense of CMOV
7600    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7601    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7602    __ bind(skip);
7603  %}
7604  ins_pipe( pipe_slow );
7605%}
7606
7607instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
7608  predicate (UseSSE>=2);
7609  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7610  ins_cost(200);
7611  expand %{
7612    fcmovD_regU(cop, cr, dst, src);
7613  %}
7614%}
7615
7616instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7617  predicate(VM_Version::supports_cmov() );
7618  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7619  ins_cost(200);
7620  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7621            "CMOV$cop $dst.hi,$src.hi" %}
7622  opcode(0x0F,0x40);
7623  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7624  ins_pipe( pipe_cmov_reg_long );
7625%}
7626
7627instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7628  predicate(VM_Version::supports_cmov() );
7629  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7630  ins_cost(200);
7631  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7632            "CMOV$cop $dst.hi,$src.hi" %}
7633  opcode(0x0F,0x40);
7634  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7635  ins_pipe( pipe_cmov_reg_long );
7636%}
7637
7638instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7639  predicate(VM_Version::supports_cmov() );
7640  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7641  ins_cost(200);
7642  expand %{
7643    cmovL_regU(cop, cr, dst, src);
7644  %}
7645%}
7646
7647//----------Arithmetic Instructions--------------------------------------------
7648//----------Addition Instructions----------------------------------------------
7649// Integer Addition Instructions
7650instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
7651  match(Set dst (AddI dst src));
7652  effect(KILL cr);
7653
7654  size(2);
7655  format %{ "ADD    $dst,$src" %}
7656  opcode(0x03);
7657  ins_encode( OpcP, RegReg( dst, src) );
7658  ins_pipe( ialu_reg_reg );
7659%}
7660
7661instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
7662  match(Set dst (AddI dst src));
7663  effect(KILL cr);
7664
7665  format %{ "ADD    $dst,$src" %}
7666  opcode(0x81, 0x00); /* /0 id */
7667  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7668  ins_pipe( ialu_reg );
7669%}
7670
7671instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
7672  predicate(UseIncDec);
7673  match(Set dst (AddI dst src));
7674  effect(KILL cr);
7675
7676  size(1);
7677  format %{ "INC    $dst" %}
7678  opcode(0x40); /*  */
7679  ins_encode( Opc_plus( primary, dst ) );
7680  ins_pipe( ialu_reg );
7681%}
7682
7683instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
7684  match(Set dst (AddI src0 src1));
7685  ins_cost(110);
7686
7687  format %{ "LEA    $dst,[$src0 + $src1]" %}
7688  opcode(0x8D); /* 0x8D /r */
7689  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7690  ins_pipe( ialu_reg_reg );
7691%}
7692
7693instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7694  match(Set dst (AddP src0 src1));
7695  ins_cost(110);
7696
7697  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7698  opcode(0x8D); /* 0x8D /r */
7699  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7700  ins_pipe( ialu_reg_reg );
7701%}
7702
7703instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
7704  predicate(UseIncDec);
7705  match(Set dst (AddI dst src));
7706  effect(KILL cr);
7707
7708  size(1);
7709  format %{ "DEC    $dst" %}
7710  opcode(0x48); /*  */
7711  ins_encode( Opc_plus( primary, dst ) );
7712  ins_pipe( ialu_reg );
7713%}
7714
7715instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
7716  match(Set dst (AddP dst src));
7717  effect(KILL cr);
7718
7719  size(2);
7720  format %{ "ADD    $dst,$src" %}
7721  opcode(0x03);
7722  ins_encode( OpcP, RegReg( dst, src) );
7723  ins_pipe( ialu_reg_reg );
7724%}
7725
7726instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7727  match(Set dst (AddP dst src));
7728  effect(KILL cr);
7729
7730  format %{ "ADD    $dst,$src" %}
7731  opcode(0x81,0x00); /* Opcode 81 /0 id */
7732  // ins_encode( RegImm( dst, src) );
7733  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7734  ins_pipe( ialu_reg );
7735%}
7736
7737instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
7738  match(Set dst (AddI dst (LoadI src)));
7739  effect(KILL cr);
7740
7741  ins_cost(125);
7742  format %{ "ADD    $dst,$src" %}
7743  opcode(0x03);
7744  ins_encode( OpcP, RegMem( dst, src) );
7745  ins_pipe( ialu_reg_mem );
7746%}
7747
7748instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
7749  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7750  effect(KILL cr);
7751
7752  ins_cost(150);
7753  format %{ "ADD    $dst,$src" %}
7754  opcode(0x01);  /* Opcode 01 /r */
7755  ins_encode( OpcP, RegMem( src, dst ) );
7756  ins_pipe( ialu_mem_reg );
7757%}
7758
7759// Add Memory with Immediate
7760instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7761  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7762  effect(KILL cr);
7763
7764  ins_cost(125);
7765  format %{ "ADD    $dst,$src" %}
7766  opcode(0x81);               /* Opcode 81 /0 id */
7767  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7768  ins_pipe( ialu_mem_imm );
7769%}
7770
7771instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7772  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7773  effect(KILL cr);
7774
7775  ins_cost(125);
7776  format %{ "INC    $dst" %}
7777  opcode(0xFF);               /* Opcode FF /0 */
7778  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7779  ins_pipe( ialu_mem_imm );
7780%}
7781
7782instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7783  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7784  effect(KILL cr);
7785
7786  ins_cost(125);
7787  format %{ "DEC    $dst" %}
7788  opcode(0xFF);               /* Opcode FF /1 */
7789  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7790  ins_pipe( ialu_mem_imm );
7791%}
7792
7793
7794instruct checkCastPP( eRegP dst ) %{
7795  match(Set dst (CheckCastPP dst));
7796
7797  size(0);
7798  format %{ "#checkcastPP of $dst" %}
7799  ins_encode( /*empty encoding*/ );
7800  ins_pipe( empty );
7801%}
7802
7803instruct castPP( eRegP dst ) %{
7804  match(Set dst (CastPP dst));
7805  format %{ "#castPP of $dst" %}
7806  ins_encode( /*empty encoding*/ );
7807  ins_pipe( empty );
7808%}
7809
7810instruct castII( eRegI dst ) %{
7811  match(Set dst (CastII dst));
7812  format %{ "#castII of $dst" %}
7813  ins_encode( /*empty encoding*/ );
7814  ins_cost(0);
7815  ins_pipe( empty );
7816%}
7817
7818
7819// Load-locked - same as a regular pointer load when used with compare-swap
7820instruct loadPLocked(eRegP dst, memory mem) %{
7821  match(Set dst (LoadPLocked mem));
7822
7823  ins_cost(125);
7824  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7825  opcode(0x8B);
7826  ins_encode( OpcP, RegMem(dst,mem));
7827  ins_pipe( ialu_reg_mem );
7828%}
7829
7830// LoadLong-locked - same as a volatile long load when used with compare-swap
7831instruct loadLLocked(stackSlotL dst, memory mem) %{
7832  predicate(UseSSE<=1);
7833  match(Set dst (LoadLLocked mem));
7834
7835  ins_cost(200);
7836  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
7837            "FISTp  $dst" %}
7838  ins_encode(enc_loadL_volatile(mem,dst));
7839  ins_pipe( fpu_reg_mem );
7840%}
7841
7842instruct loadLX_Locked(stackSlotL dst, memory mem, regD tmp) %{
7843  predicate(UseSSE>=2);
7844  match(Set dst (LoadLLocked mem));
7845  effect(TEMP tmp);
7846  ins_cost(180);
7847  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
7848            "MOVSD  $dst,$tmp" %}
7849  ins_encode %{
7850    __ movdbl($tmp$$XMMRegister, $mem$$Address);
7851    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
7852  %}
7853  ins_pipe( pipe_slow );
7854%}
7855
7856instruct loadLX_reg_Locked(eRegL dst, memory mem, regD tmp) %{
7857  predicate(UseSSE>=2);
7858  match(Set dst (LoadLLocked mem));
7859  effect(TEMP tmp);
7860  ins_cost(160);
7861  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
7862            "MOVD   $dst.lo,$tmp\n\t"
7863            "PSRLQ  $tmp,32\n\t"
7864            "MOVD   $dst.hi,$tmp" %}
7865  ins_encode %{
7866    __ movdbl($tmp$$XMMRegister, $mem$$Address);
7867    __ movdl($dst$$Register, $tmp$$XMMRegister);
7868    __ psrlq($tmp$$XMMRegister, 32);
7869    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
7870  %}
7871  ins_pipe( pipe_slow );
7872%}
7873
7874// Conditional-store of the updated heap-top.
7875// Used during allocation of the shared heap.
7876// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7877instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7878  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7879  // EAX is killed if there is contention, but then it's also unused.
7880  // In the common case of no contention, EAX holds the new oop address.
7881  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7882  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7883  ins_pipe( pipe_cmpxchg );
7884%}
7885
7886// Conditional-store of an int value.
7887// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7888instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
7889  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7890  effect(KILL oldval);
7891  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7892  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7893  ins_pipe( pipe_cmpxchg );
7894%}
7895
7896// Conditional-store of a long value.
7897// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7898instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7899  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7900  effect(KILL oldval);
7901  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7902            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7903            "XCHG   EBX,ECX"
7904  %}
7905  ins_encode %{
7906    // Note: we need to swap rbx, and rcx before and after the
7907    //       cmpxchg8 instruction because the instruction uses
7908    //       rcx as the high order word of the new value to store but
7909    //       our register encoding uses rbx.
7910    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7911    if( os::is_MP() )
7912      __ lock();
7913    __ cmpxchg8($mem$$Address);
7914    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7915  %}
7916  ins_pipe( pipe_cmpxchg );
7917%}
7918
7919// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7920
7921instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7922  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7923  effect(KILL cr, KILL oldval);
7924  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7925            "MOV    $res,0\n\t"
7926            "JNE,s  fail\n\t"
7927            "MOV    $res,1\n"
7928          "fail:" %}
7929  ins_encode( enc_cmpxchg8(mem_ptr),
7930              enc_flags_ne_to_boolean(res) );
7931  ins_pipe( pipe_cmpxchg );
7932%}
7933
7934instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7935  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7936  effect(KILL cr, KILL oldval);
7937  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7938            "MOV    $res,0\n\t"
7939            "JNE,s  fail\n\t"
7940            "MOV    $res,1\n"
7941          "fail:" %}
7942  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7943  ins_pipe( pipe_cmpxchg );
7944%}
7945
7946instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7947  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7948  effect(KILL cr, KILL oldval);
7949  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7950            "MOV    $res,0\n\t"
7951            "JNE,s  fail\n\t"
7952            "MOV    $res,1\n"
7953          "fail:" %}
7954  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7955  ins_pipe( pipe_cmpxchg );
7956%}
7957
7958//----------Subtraction Instructions-------------------------------------------
7959// Integer Subtraction Instructions
7960instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
7961  match(Set dst (SubI dst src));
7962  effect(KILL cr);
7963
7964  size(2);
7965  format %{ "SUB    $dst,$src" %}
7966  opcode(0x2B);
7967  ins_encode( OpcP, RegReg( dst, src) );
7968  ins_pipe( ialu_reg_reg );
7969%}
7970
7971instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
7972  match(Set dst (SubI dst src));
7973  effect(KILL cr);
7974
7975  format %{ "SUB    $dst,$src" %}
7976  opcode(0x81,0x05);  /* Opcode 81 /5 */
7977  // ins_encode( RegImm( dst, src) );
7978  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7979  ins_pipe( ialu_reg );
7980%}
7981
7982instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
7983  match(Set dst (SubI dst (LoadI src)));
7984  effect(KILL cr);
7985
7986  ins_cost(125);
7987  format %{ "SUB    $dst,$src" %}
7988  opcode(0x2B);
7989  ins_encode( OpcP, RegMem( dst, src) );
7990  ins_pipe( ialu_reg_mem );
7991%}
7992
7993instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
7994  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7995  effect(KILL cr);
7996
7997  ins_cost(150);
7998  format %{ "SUB    $dst,$src" %}
7999  opcode(0x29);  /* Opcode 29 /r */
8000  ins_encode( OpcP, RegMem( src, dst ) );
8001  ins_pipe( ialu_mem_reg );
8002%}
8003
8004// Subtract from a pointer
8005instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8006  match(Set dst (AddP dst (SubI zero src)));
8007  effect(KILL cr);
8008
8009  size(2);
8010  format %{ "SUB    $dst,$src" %}
8011  opcode(0x2B);
8012  ins_encode( OpcP, RegReg( dst, src) );
8013  ins_pipe( ialu_reg_reg );
8014%}
8015
8016instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8017  match(Set dst (SubI zero dst));
8018  effect(KILL cr);
8019
8020  size(2);
8021  format %{ "NEG    $dst" %}
8022  opcode(0xF7,0x03);  // Opcode F7 /3
8023  ins_encode( OpcP, RegOpc( dst ) );
8024  ins_pipe( ialu_reg );
8025%}
8026
8027
8028//----------Multiplication/Division Instructions-------------------------------
8029// Integer Multiplication Instructions
8030// Multiply Register
8031instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8032  match(Set dst (MulI dst src));
8033  effect(KILL cr);
8034
8035  size(3);
8036  ins_cost(300);
8037  format %{ "IMUL   $dst,$src" %}
8038  opcode(0xAF, 0x0F);
8039  ins_encode( OpcS, OpcP, RegReg( dst, src) );
8040  ins_pipe( ialu_reg_reg_alu0 );
8041%}
8042
8043// Multiply 32-bit Immediate
8044instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8045  match(Set dst (MulI src imm));
8046  effect(KILL cr);
8047
8048  ins_cost(300);
8049  format %{ "IMUL   $dst,$src,$imm" %}
8050  opcode(0x69);  /* 69 /r id */
8051  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8052  ins_pipe( ialu_reg_reg_alu0 );
8053%}
8054
8055instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8056  match(Set dst src);
8057  effect(KILL cr);
8058
8059  // Note that this is artificially increased to make it more expensive than loadConL
8060  ins_cost(250);
8061  format %{ "MOV    EAX,$src\t// low word only" %}
8062  opcode(0xB8);
8063  ins_encode( LdImmL_Lo(dst, src) );
8064  ins_pipe( ialu_reg_fat );
8065%}
8066
8067// Multiply by 32-bit Immediate, taking the shifted high order results
8068//  (special case for shift by 32)
8069instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8070  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8071  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8072             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8073             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8074  effect(USE src1, KILL cr);
8075
8076  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8077  ins_cost(0*100 + 1*400 - 150);
8078  format %{ "IMUL   EDX:EAX,$src1" %}
8079  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8080  ins_pipe( pipe_slow );
8081%}
8082
8083// Multiply by 32-bit Immediate, taking the shifted high order results
8084instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8085  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8086  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8087             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8088             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8089  effect(USE src1, KILL cr);
8090
8091  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8092  ins_cost(1*100 + 1*400 - 150);
8093  format %{ "IMUL   EDX:EAX,$src1\n\t"
8094            "SAR    EDX,$cnt-32" %}
8095  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8096  ins_pipe( pipe_slow );
8097%}
8098
8099// Multiply Memory 32-bit Immediate
8100instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8101  match(Set dst (MulI (LoadI src) imm));
8102  effect(KILL cr);
8103
8104  ins_cost(300);
8105  format %{ "IMUL   $dst,$src,$imm" %}
8106  opcode(0x69);  /* 69 /r id */
8107  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8108  ins_pipe( ialu_reg_mem_alu0 );
8109%}
8110
8111// Multiply Memory
8112instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8113  match(Set dst (MulI dst (LoadI src)));
8114  effect(KILL cr);
8115
8116  ins_cost(350);
8117  format %{ "IMUL   $dst,$src" %}
8118  opcode(0xAF, 0x0F);
8119  ins_encode( OpcS, OpcP, RegMem( dst, src) );
8120  ins_pipe( ialu_reg_mem_alu0 );
8121%}
8122
8123// Multiply Register Int to Long
8124instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8125  // Basic Idea: long = (long)int * (long)int
8126  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8127  effect(DEF dst, USE src, USE src1, KILL flags);
8128
8129  ins_cost(300);
8130  format %{ "IMUL   $dst,$src1" %}
8131
8132  ins_encode( long_int_multiply( dst, src1 ) );
8133  ins_pipe( ialu_reg_reg_alu0 );
8134%}
8135
8136instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8137  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8138  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8139  effect(KILL flags);
8140
8141  ins_cost(300);
8142  format %{ "MUL    $dst,$src1" %}
8143
8144  ins_encode( long_uint_multiply(dst, src1) );
8145  ins_pipe( ialu_reg_reg_alu0 );
8146%}
8147
8148// Multiply Register Long
8149instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8150  match(Set dst (MulL dst src));
8151  effect(KILL cr, TEMP tmp);
8152  ins_cost(4*100+3*400);
8153// Basic idea: lo(result) = lo(x_lo * y_lo)
8154//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8155  format %{ "MOV    $tmp,$src.lo\n\t"
8156            "IMUL   $tmp,EDX\n\t"
8157            "MOV    EDX,$src.hi\n\t"
8158            "IMUL   EDX,EAX\n\t"
8159            "ADD    $tmp,EDX\n\t"
8160            "MUL    EDX:EAX,$src.lo\n\t"
8161            "ADD    EDX,$tmp" %}
8162  ins_encode( long_multiply( dst, src, tmp ) );
8163  ins_pipe( pipe_slow );
8164%}
8165
8166// Multiply Register Long where the left operand's high 32 bits are zero
8167instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8168  predicate(is_operand_hi32_zero(n->in(1)));
8169  match(Set dst (MulL dst src));
8170  effect(KILL cr, TEMP tmp);
8171  ins_cost(2*100+2*400);
8172// Basic idea: lo(result) = lo(x_lo * y_lo)
8173//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
8174  format %{ "MOV    $tmp,$src.hi\n\t"
8175            "IMUL   $tmp,EAX\n\t"
8176            "MUL    EDX:EAX,$src.lo\n\t"
8177            "ADD    EDX,$tmp" %}
8178  ins_encode %{
8179    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
8180    __ imull($tmp$$Register, rax);
8181    __ mull($src$$Register);
8182    __ addl(rdx, $tmp$$Register);
8183  %}
8184  ins_pipe( pipe_slow );
8185%}
8186
8187// Multiply Register Long where the right operand's high 32 bits are zero
8188instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8189  predicate(is_operand_hi32_zero(n->in(2)));
8190  match(Set dst (MulL dst src));
8191  effect(KILL cr, TEMP tmp);
8192  ins_cost(2*100+2*400);
8193// Basic idea: lo(result) = lo(x_lo * y_lo)
8194//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
8195  format %{ "MOV    $tmp,$src.lo\n\t"
8196            "IMUL   $tmp,EDX\n\t"
8197            "MUL    EDX:EAX,$src.lo\n\t"
8198            "ADD    EDX,$tmp" %}
8199  ins_encode %{
8200    __ movl($tmp$$Register, $src$$Register);
8201    __ imull($tmp$$Register, rdx);
8202    __ mull($src$$Register);
8203    __ addl(rdx, $tmp$$Register);
8204  %}
8205  ins_pipe( pipe_slow );
8206%}
8207
8208// Multiply Register Long where the left and the right operands' high 32 bits are zero
8209instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
8210  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
8211  match(Set dst (MulL dst src));
8212  effect(KILL cr);
8213  ins_cost(1*400);
8214// Basic idea: lo(result) = lo(x_lo * y_lo)
8215//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
8216  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
8217  ins_encode %{
8218    __ mull($src$$Register);
8219  %}
8220  ins_pipe( pipe_slow );
8221%}
8222
8223// Multiply Register Long by small constant
8224instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8225  match(Set dst (MulL dst src));
8226  effect(KILL cr, TEMP tmp);
8227  ins_cost(2*100+2*400);
8228  size(12);
8229// Basic idea: lo(result) = lo(src * EAX)
8230//             hi(result) = hi(src * EAX) + lo(src * EDX)
8231  format %{ "IMUL   $tmp,EDX,$src\n\t"
8232            "MOV    EDX,$src\n\t"
8233            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8234            "ADD    EDX,$tmp" %}
8235  ins_encode( long_multiply_con( dst, src, tmp ) );
8236  ins_pipe( pipe_slow );
8237%}
8238
8239// Integer DIV with Register
8240instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8241  match(Set rax (DivI rax div));
8242  effect(KILL rdx, KILL cr);
8243  size(26);
8244  ins_cost(30*100+10*100);
8245  format %{ "CMP    EAX,0x80000000\n\t"
8246            "JNE,s  normal\n\t"
8247            "XOR    EDX,EDX\n\t"
8248            "CMP    ECX,-1\n\t"
8249            "JE,s   done\n"
8250    "normal: CDQ\n\t"
8251            "IDIV   $div\n\t"
8252    "done:"        %}
8253  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8254  ins_encode( cdq_enc, OpcP, RegOpc(div) );
8255  ins_pipe( ialu_reg_reg_alu0 );
8256%}
8257
8258// Divide Register Long
8259instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8260  match(Set dst (DivL src1 src2));
8261  effect( KILL cr, KILL cx, KILL bx );
8262  ins_cost(10000);
8263  format %{ "PUSH   $src1.hi\n\t"
8264            "PUSH   $src1.lo\n\t"
8265            "PUSH   $src2.hi\n\t"
8266            "PUSH   $src2.lo\n\t"
8267            "CALL   SharedRuntime::ldiv\n\t"
8268            "ADD    ESP,16" %}
8269  ins_encode( long_div(src1,src2) );
8270  ins_pipe( pipe_slow );
8271%}
8272
8273// Integer DIVMOD with Register, both quotient and mod results
8274instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8275  match(DivModI rax div);
8276  effect(KILL cr);
8277  size(26);
8278  ins_cost(30*100+10*100);
8279  format %{ "CMP    EAX,0x80000000\n\t"
8280            "JNE,s  normal\n\t"
8281            "XOR    EDX,EDX\n\t"
8282            "CMP    ECX,-1\n\t"
8283            "JE,s   done\n"
8284    "normal: CDQ\n\t"
8285            "IDIV   $div\n\t"
8286    "done:"        %}
8287  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8288  ins_encode( cdq_enc, OpcP, RegOpc(div) );
8289  ins_pipe( pipe_slow );
8290%}
8291
8292// Integer MOD with Register
8293instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8294  match(Set rdx (ModI rax div));
8295  effect(KILL rax, KILL cr);
8296
8297  size(26);
8298  ins_cost(300);
8299  format %{ "CDQ\n\t"
8300            "IDIV   $div" %}
8301  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8302  ins_encode( cdq_enc, OpcP, RegOpc(div) );
8303  ins_pipe( ialu_reg_reg_alu0 );
8304%}
8305
8306// Remainder Register Long
8307instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8308  match(Set dst (ModL src1 src2));
8309  effect( KILL cr, KILL cx, KILL bx );
8310  ins_cost(10000);
8311  format %{ "PUSH   $src1.hi\n\t"
8312            "PUSH   $src1.lo\n\t"
8313            "PUSH   $src2.hi\n\t"
8314            "PUSH   $src2.lo\n\t"
8315            "CALL   SharedRuntime::lrem\n\t"
8316            "ADD    ESP,16" %}
8317  ins_encode( long_mod(src1,src2) );
8318  ins_pipe( pipe_slow );
8319%}
8320
8321// Divide Register Long (no special case since divisor != -1)
8322instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8323  match(Set dst (DivL dst imm));
8324  effect( TEMP tmp, TEMP tmp2, KILL cr );
8325  ins_cost(1000);
8326  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
8327            "XOR    $tmp2,$tmp2\n\t"
8328            "CMP    $tmp,EDX\n\t"
8329            "JA,s   fast\n\t"
8330            "MOV    $tmp2,EAX\n\t"
8331            "MOV    EAX,EDX\n\t"
8332            "MOV    EDX,0\n\t"
8333            "JLE,s  pos\n\t"
8334            "LNEG   EAX : $tmp2\n\t"
8335            "DIV    $tmp # unsigned division\n\t"
8336            "XCHG   EAX,$tmp2\n\t"
8337            "DIV    $tmp\n\t"
8338            "LNEG   $tmp2 : EAX\n\t"
8339            "JMP,s  done\n"
8340    "pos:\n\t"
8341            "DIV    $tmp\n\t"
8342            "XCHG   EAX,$tmp2\n"
8343    "fast:\n\t"
8344            "DIV    $tmp\n"
8345    "done:\n\t"
8346            "MOV    EDX,$tmp2\n\t"
8347            "NEG    EDX:EAX # if $imm < 0" %}
8348  ins_encode %{
8349    int con = (int)$imm$$constant;
8350    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8351    int pcon = (con > 0) ? con : -con;
8352    Label Lfast, Lpos, Ldone;
8353
8354    __ movl($tmp$$Register, pcon);
8355    __ xorl($tmp2$$Register,$tmp2$$Register);
8356    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8357    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
8358
8359    __ movl($tmp2$$Register, $dst$$Register); // save
8360    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8361    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8362    __ jccb(Assembler::lessEqual, Lpos); // result is positive
8363
8364    // Negative dividend.
8365    // convert value to positive to use unsigned division
8366    __ lneg($dst$$Register, $tmp2$$Register);
8367    __ divl($tmp$$Register);
8368    __ xchgl($dst$$Register, $tmp2$$Register);
8369    __ divl($tmp$$Register);
8370    // revert result back to negative
8371    __ lneg($tmp2$$Register, $dst$$Register);
8372    __ jmpb(Ldone);
8373
8374    __ bind(Lpos);
8375    __ divl($tmp$$Register); // Use unsigned division
8376    __ xchgl($dst$$Register, $tmp2$$Register);
8377    // Fallthrow for final divide, tmp2 has 32 bit hi result
8378
8379    __ bind(Lfast);
8380    // fast path: src is positive
8381    __ divl($tmp$$Register); // Use unsigned division
8382
8383    __ bind(Ldone);
8384    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
8385    if (con < 0) {
8386      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
8387    }
8388  %}
8389  ins_pipe( pipe_slow );
8390%}
8391
8392// Remainder Register Long (remainder fit into 32 bits)
8393instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, eRegI tmp, eRegI tmp2, eFlagsReg cr ) %{
8394  match(Set dst (ModL dst imm));
8395  effect( TEMP tmp, TEMP tmp2, KILL cr );
8396  ins_cost(1000);
8397  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
8398            "CMP    $tmp,EDX\n\t"
8399            "JA,s   fast\n\t"
8400            "MOV    $tmp2,EAX\n\t"
8401            "MOV    EAX,EDX\n\t"
8402            "MOV    EDX,0\n\t"
8403            "JLE,s  pos\n\t"
8404            "LNEG   EAX : $tmp2\n\t"
8405            "DIV    $tmp # unsigned division\n\t"
8406            "MOV    EAX,$tmp2\n\t"
8407            "DIV    $tmp\n\t"
8408            "NEG    EDX\n\t"
8409            "JMP,s  done\n"
8410    "pos:\n\t"
8411            "DIV    $tmp\n\t"
8412            "MOV    EAX,$tmp2\n"
8413    "fast:\n\t"
8414            "DIV    $tmp\n"
8415    "done:\n\t"
8416            "MOV    EAX,EDX\n\t"
8417            "SAR    EDX,31\n\t" %}
8418  ins_encode %{
8419    int con = (int)$imm$$constant;
8420    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
8421    int pcon = (con > 0) ? con : -con;
8422    Label  Lfast, Lpos, Ldone;
8423
8424    __ movl($tmp$$Register, pcon);
8425    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
8426    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
8427
8428    __ movl($tmp2$$Register, $dst$$Register); // save
8429    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8430    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
8431    __ jccb(Assembler::lessEqual, Lpos); // result is positive
8432
8433    // Negative dividend.
8434    // convert value to positive to use unsigned division
8435    __ lneg($dst$$Register, $tmp2$$Register);
8436    __ divl($tmp$$Register);
8437    __ movl($dst$$Register, $tmp2$$Register);
8438    __ divl($tmp$$Register);
8439    // revert remainder back to negative
8440    __ negl(HIGH_FROM_LOW($dst$$Register));
8441    __ jmpb(Ldone);
8442
8443    __ bind(Lpos);
8444    __ divl($tmp$$Register);
8445    __ movl($dst$$Register, $tmp2$$Register);
8446
8447    __ bind(Lfast);
8448    // fast path: src is positive
8449    __ divl($tmp$$Register);
8450
8451    __ bind(Ldone);
8452    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
8453    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
8454
8455  %}
8456  ins_pipe( pipe_slow );
8457%}
8458
8459// Integer Shift Instructions
8460// Shift Left by one
8461instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8462  match(Set dst (LShiftI dst shift));
8463  effect(KILL cr);
8464
8465  size(2);
8466  format %{ "SHL    $dst,$shift" %}
8467  opcode(0xD1, 0x4);  /* D1 /4 */
8468  ins_encode( OpcP, RegOpc( dst ) );
8469  ins_pipe( ialu_reg );
8470%}
8471
8472// Shift Left by 8-bit immediate
8473instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8474  match(Set dst (LShiftI dst shift));
8475  effect(KILL cr);
8476
8477  size(3);
8478  format %{ "SHL    $dst,$shift" %}
8479  opcode(0xC1, 0x4);  /* C1 /4 ib */
8480  ins_encode( RegOpcImm( dst, shift) );
8481  ins_pipe( ialu_reg );
8482%}
8483
8484// Shift Left by variable
8485instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8486  match(Set dst (LShiftI dst shift));
8487  effect(KILL cr);
8488
8489  size(2);
8490  format %{ "SHL    $dst,$shift" %}
8491  opcode(0xD3, 0x4);  /* D3 /4 */
8492  ins_encode( OpcP, RegOpc( dst ) );
8493  ins_pipe( ialu_reg_reg );
8494%}
8495
8496// Arithmetic shift right by one
8497instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8498  match(Set dst (RShiftI dst shift));
8499  effect(KILL cr);
8500
8501  size(2);
8502  format %{ "SAR    $dst,$shift" %}
8503  opcode(0xD1, 0x7);  /* D1 /7 */
8504  ins_encode( OpcP, RegOpc( dst ) );
8505  ins_pipe( ialu_reg );
8506%}
8507
8508// Arithmetic shift right by one
8509instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8510  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8511  effect(KILL cr);
8512  format %{ "SAR    $dst,$shift" %}
8513  opcode(0xD1, 0x7);  /* D1 /7 */
8514  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8515  ins_pipe( ialu_mem_imm );
8516%}
8517
8518// Arithmetic Shift Right by 8-bit immediate
8519instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8520  match(Set dst (RShiftI dst shift));
8521  effect(KILL cr);
8522
8523  size(3);
8524  format %{ "SAR    $dst,$shift" %}
8525  opcode(0xC1, 0x7);  /* C1 /7 ib */
8526  ins_encode( RegOpcImm( dst, shift ) );
8527  ins_pipe( ialu_mem_imm );
8528%}
8529
8530// Arithmetic Shift Right by 8-bit immediate
8531instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8532  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8533  effect(KILL cr);
8534
8535  format %{ "SAR    $dst,$shift" %}
8536  opcode(0xC1, 0x7);  /* C1 /7 ib */
8537  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8538  ins_pipe( ialu_mem_imm );
8539%}
8540
8541// Arithmetic Shift Right by variable
8542instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8543  match(Set dst (RShiftI dst shift));
8544  effect(KILL cr);
8545
8546  size(2);
8547  format %{ "SAR    $dst,$shift" %}
8548  opcode(0xD3, 0x7);  /* D3 /7 */
8549  ins_encode( OpcP, RegOpc( dst ) );
8550  ins_pipe( ialu_reg_reg );
8551%}
8552
8553// Logical shift right by one
8554instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8555  match(Set dst (URShiftI dst shift));
8556  effect(KILL cr);
8557
8558  size(2);
8559  format %{ "SHR    $dst,$shift" %}
8560  opcode(0xD1, 0x5);  /* D1 /5 */
8561  ins_encode( OpcP, RegOpc( dst ) );
8562  ins_pipe( ialu_reg );
8563%}
8564
8565// Logical Shift Right by 8-bit immediate
8566instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8567  match(Set dst (URShiftI dst shift));
8568  effect(KILL cr);
8569
8570  size(3);
8571  format %{ "SHR    $dst,$shift" %}
8572  opcode(0xC1, 0x5);  /* C1 /5 ib */
8573  ins_encode( RegOpcImm( dst, shift) );
8574  ins_pipe( ialu_reg );
8575%}
8576
8577
8578// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8579// This idiom is used by the compiler for the i2b bytecode.
8580instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour) %{
8581  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8582
8583  size(3);
8584  format %{ "MOVSX  $dst,$src :8" %}
8585  ins_encode %{
8586    __ movsbl($dst$$Register, $src$$Register);
8587  %}
8588  ins_pipe(ialu_reg_reg);
8589%}
8590
8591// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8592// This idiom is used by the compiler the i2s bytecode.
8593instruct i2s(eRegI dst, xRegI src, immI_16 sixteen) %{
8594  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8595
8596  size(3);
8597  format %{ "MOVSX  $dst,$src :16" %}
8598  ins_encode %{
8599    __ movswl($dst$$Register, $src$$Register);
8600  %}
8601  ins_pipe(ialu_reg_reg);
8602%}
8603
8604
8605// Logical Shift Right by variable
8606instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8607  match(Set dst (URShiftI dst shift));
8608  effect(KILL cr);
8609
8610  size(2);
8611  format %{ "SHR    $dst,$shift" %}
8612  opcode(0xD3, 0x5);  /* D3 /5 */
8613  ins_encode( OpcP, RegOpc( dst ) );
8614  ins_pipe( ialu_reg_reg );
8615%}
8616
8617
8618//----------Logical Instructions-----------------------------------------------
8619//----------Integer Logical Instructions---------------------------------------
8620// And Instructions
8621// And Register with Register
8622instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8623  match(Set dst (AndI dst src));
8624  effect(KILL cr);
8625
8626  size(2);
8627  format %{ "AND    $dst,$src" %}
8628  opcode(0x23);
8629  ins_encode( OpcP, RegReg( dst, src) );
8630  ins_pipe( ialu_reg_reg );
8631%}
8632
8633// And Register with Immediate
8634instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8635  match(Set dst (AndI dst src));
8636  effect(KILL cr);
8637
8638  format %{ "AND    $dst,$src" %}
8639  opcode(0x81,0x04);  /* Opcode 81 /4 */
8640  // ins_encode( RegImm( dst, src) );
8641  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8642  ins_pipe( ialu_reg );
8643%}
8644
8645// And Register with Memory
8646instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8647  match(Set dst (AndI dst (LoadI src)));
8648  effect(KILL cr);
8649
8650  ins_cost(125);
8651  format %{ "AND    $dst,$src" %}
8652  opcode(0x23);
8653  ins_encode( OpcP, RegMem( dst, src) );
8654  ins_pipe( ialu_reg_mem );
8655%}
8656
8657// And Memory with Register
8658instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8659  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8660  effect(KILL cr);
8661
8662  ins_cost(150);
8663  format %{ "AND    $dst,$src" %}
8664  opcode(0x21);  /* Opcode 21 /r */
8665  ins_encode( OpcP, RegMem( src, dst ) );
8666  ins_pipe( ialu_mem_reg );
8667%}
8668
8669// And Memory with Immediate
8670instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8671  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8672  effect(KILL cr);
8673
8674  ins_cost(125);
8675  format %{ "AND    $dst,$src" %}
8676  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8677  // ins_encode( MemImm( dst, src) );
8678  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8679  ins_pipe( ialu_mem_imm );
8680%}
8681
8682// Or Instructions
8683// Or Register with Register
8684instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8685  match(Set dst (OrI dst src));
8686  effect(KILL cr);
8687
8688  size(2);
8689  format %{ "OR     $dst,$src" %}
8690  opcode(0x0B);
8691  ins_encode( OpcP, RegReg( dst, src) );
8692  ins_pipe( ialu_reg_reg );
8693%}
8694
8695instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
8696  match(Set dst (OrI dst (CastP2X src)));
8697  effect(KILL cr);
8698
8699  size(2);
8700  format %{ "OR     $dst,$src" %}
8701  opcode(0x0B);
8702  ins_encode( OpcP, RegReg( dst, src) );
8703  ins_pipe( ialu_reg_reg );
8704%}
8705
8706
8707// Or Register with Immediate
8708instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8709  match(Set dst (OrI dst src));
8710  effect(KILL cr);
8711
8712  format %{ "OR     $dst,$src" %}
8713  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8714  // ins_encode( RegImm( dst, src) );
8715  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8716  ins_pipe( ialu_reg );
8717%}
8718
8719// Or Register with Memory
8720instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8721  match(Set dst (OrI dst (LoadI src)));
8722  effect(KILL cr);
8723
8724  ins_cost(125);
8725  format %{ "OR     $dst,$src" %}
8726  opcode(0x0B);
8727  ins_encode( OpcP, RegMem( dst, src) );
8728  ins_pipe( ialu_reg_mem );
8729%}
8730
8731// Or Memory with Register
8732instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8733  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8734  effect(KILL cr);
8735
8736  ins_cost(150);
8737  format %{ "OR     $dst,$src" %}
8738  opcode(0x09);  /* Opcode 09 /r */
8739  ins_encode( OpcP, RegMem( src, dst ) );
8740  ins_pipe( ialu_mem_reg );
8741%}
8742
8743// Or Memory with Immediate
8744instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8745  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8746  effect(KILL cr);
8747
8748  ins_cost(125);
8749  format %{ "OR     $dst,$src" %}
8750  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8751  // ins_encode( MemImm( dst, src) );
8752  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8753  ins_pipe( ialu_mem_imm );
8754%}
8755
8756// ROL/ROR
8757// ROL expand
8758instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8759  effect(USE_DEF dst, USE shift, KILL cr);
8760
8761  format %{ "ROL    $dst, $shift" %}
8762  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8763  ins_encode( OpcP, RegOpc( dst ));
8764  ins_pipe( ialu_reg );
8765%}
8766
8767instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8768  effect(USE_DEF dst, USE shift, KILL cr);
8769
8770  format %{ "ROL    $dst, $shift" %}
8771  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8772  ins_encode( RegOpcImm(dst, shift) );
8773  ins_pipe(ialu_reg);
8774%}
8775
8776instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8777  effect(USE_DEF dst, USE shift, KILL cr);
8778
8779  format %{ "ROL    $dst, $shift" %}
8780  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8781  ins_encode(OpcP, RegOpc(dst));
8782  ins_pipe( ialu_reg_reg );
8783%}
8784// end of ROL expand
8785
8786// ROL 32bit by one once
8787instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8788  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8789
8790  expand %{
8791    rolI_eReg_imm1(dst, lshift, cr);
8792  %}
8793%}
8794
8795// ROL 32bit var by imm8 once
8796instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8797  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8798  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8799
8800  expand %{
8801    rolI_eReg_imm8(dst, lshift, cr);
8802  %}
8803%}
8804
8805// ROL 32bit var by var once
8806instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8807  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8808
8809  expand %{
8810    rolI_eReg_CL(dst, shift, cr);
8811  %}
8812%}
8813
8814// ROL 32bit var by var once
8815instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8816  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8817
8818  expand %{
8819    rolI_eReg_CL(dst, shift, cr);
8820  %}
8821%}
8822
8823// ROR expand
8824instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8825  effect(USE_DEF dst, USE shift, KILL cr);
8826
8827  format %{ "ROR    $dst, $shift" %}
8828  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8829  ins_encode( OpcP, RegOpc( dst ) );
8830  ins_pipe( ialu_reg );
8831%}
8832
8833instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8834  effect (USE_DEF dst, USE shift, KILL cr);
8835
8836  format %{ "ROR    $dst, $shift" %}
8837  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8838  ins_encode( RegOpcImm(dst, shift) );
8839  ins_pipe( ialu_reg );
8840%}
8841
8842instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8843  effect(USE_DEF dst, USE shift, KILL cr);
8844
8845  format %{ "ROR    $dst, $shift" %}
8846  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8847  ins_encode(OpcP, RegOpc(dst));
8848  ins_pipe( ialu_reg_reg );
8849%}
8850// end of ROR expand
8851
8852// ROR right once
8853instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8854  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8855
8856  expand %{
8857    rorI_eReg_imm1(dst, rshift, cr);
8858  %}
8859%}
8860
8861// ROR 32bit by immI8 once
8862instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8863  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8864  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8865
8866  expand %{
8867    rorI_eReg_imm8(dst, rshift, cr);
8868  %}
8869%}
8870
8871// ROR 32bit var by var once
8872instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8873  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8874
8875  expand %{
8876    rorI_eReg_CL(dst, shift, cr);
8877  %}
8878%}
8879
8880// ROR 32bit var by var once
8881instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8882  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8883
8884  expand %{
8885    rorI_eReg_CL(dst, shift, cr);
8886  %}
8887%}
8888
8889// Xor Instructions
8890// Xor Register with Register
8891instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8892  match(Set dst (XorI dst src));
8893  effect(KILL cr);
8894
8895  size(2);
8896  format %{ "XOR    $dst,$src" %}
8897  opcode(0x33);
8898  ins_encode( OpcP, RegReg( dst, src) );
8899  ins_pipe( ialu_reg_reg );
8900%}
8901
8902// Xor Register with Immediate -1
8903instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
8904  match(Set dst (XorI dst imm));  
8905
8906  size(2);
8907  format %{ "NOT    $dst" %}  
8908  ins_encode %{
8909     __ notl($dst$$Register);
8910  %}
8911  ins_pipe( ialu_reg );
8912%}
8913
8914// Xor Register with Immediate
8915instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8916  match(Set dst (XorI dst src));
8917  effect(KILL cr);
8918
8919  format %{ "XOR    $dst,$src" %}
8920  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8921  // ins_encode( RegImm( dst, src) );
8922  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8923  ins_pipe( ialu_reg );
8924%}
8925
8926// Xor Register with Memory
8927instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8928  match(Set dst (XorI dst (LoadI src)));
8929  effect(KILL cr);
8930
8931  ins_cost(125);
8932  format %{ "XOR    $dst,$src" %}
8933  opcode(0x33);
8934  ins_encode( OpcP, RegMem(dst, src) );
8935  ins_pipe( ialu_reg_mem );
8936%}
8937
8938// Xor Memory with Register
8939instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8940  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8941  effect(KILL cr);
8942
8943  ins_cost(150);
8944  format %{ "XOR    $dst,$src" %}
8945  opcode(0x31);  /* Opcode 31 /r */
8946  ins_encode( OpcP, RegMem( src, dst ) );
8947  ins_pipe( ialu_mem_reg );
8948%}
8949
8950// Xor Memory with Immediate
8951instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8952  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8953  effect(KILL cr);
8954
8955  ins_cost(125);
8956  format %{ "XOR    $dst,$src" %}
8957  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8958  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8959  ins_pipe( ialu_mem_imm );
8960%}
8961
8962//----------Convert Int to Boolean---------------------------------------------
8963
8964instruct movI_nocopy(eRegI dst, eRegI src) %{
8965  effect( DEF dst, USE src );
8966  format %{ "MOV    $dst,$src" %}
8967  ins_encode( enc_Copy( dst, src) );
8968  ins_pipe( ialu_reg_reg );
8969%}
8970
8971instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
8972  effect( USE_DEF dst, USE src, KILL cr );
8973
8974  size(4);
8975  format %{ "NEG    $dst\n\t"
8976            "ADC    $dst,$src" %}
8977  ins_encode( neg_reg(dst),
8978              OpcRegReg(0x13,dst,src) );
8979  ins_pipe( ialu_reg_reg_long );
8980%}
8981
8982instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
8983  match(Set dst (Conv2B src));
8984
8985  expand %{
8986    movI_nocopy(dst,src);
8987    ci2b(dst,src,cr);
8988  %}
8989%}
8990
8991instruct movP_nocopy(eRegI dst, eRegP src) %{
8992  effect( DEF dst, USE src );
8993  format %{ "MOV    $dst,$src" %}
8994  ins_encode( enc_Copy( dst, src) );
8995  ins_pipe( ialu_reg_reg );
8996%}
8997
8998instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
8999  effect( USE_DEF dst, USE src, KILL cr );
9000  format %{ "NEG    $dst\n\t"
9001            "ADC    $dst,$src" %}
9002  ins_encode( neg_reg(dst),
9003              OpcRegReg(0x13,dst,src) );
9004  ins_pipe( ialu_reg_reg_long );
9005%}
9006
9007instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
9008  match(Set dst (Conv2B src));
9009
9010  expand %{
9011    movP_nocopy(dst,src);
9012    cp2b(dst,src,cr);
9013  %}
9014%}
9015
9016instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
9017  match(Set dst (CmpLTMask p q));
9018  effect( KILL cr );
9019  ins_cost(400);
9020
9021  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
9022  format %{ "XOR    $dst,$dst\n\t"
9023            "CMP    $p,$q\n\t"
9024            "SETlt  $dst\n\t"
9025            "NEG    $dst" %}
9026  ins_encode( OpcRegReg(0x33,dst,dst),
9027              OpcRegReg(0x3B,p,q),
9028              setLT_reg(dst), neg_reg(dst) );
9029  ins_pipe( pipe_slow );
9030%}
9031
9032instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
9033  match(Set dst (CmpLTMask dst zero));
9034  effect( DEF dst, KILL cr );
9035  ins_cost(100);
9036
9037  format %{ "SAR    $dst,31" %}
9038  opcode(0xC1, 0x7);  /* C1 /7 ib */
9039  ins_encode( RegOpcImm( dst, 0x1F ) );
9040  ins_pipe( ialu_reg );
9041%}
9042
9043
9044instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
9045  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
9046  effect( KILL tmp, KILL cr );
9047  ins_cost(400);
9048  // annoyingly, $tmp has no edges so you cant ask for it in
9049  // any format or encoding
9050  format %{ "SUB    $p,$q\n\t"
9051            "SBB    ECX,ECX\n\t"
9052            "AND    ECX,$y\n\t"
9053            "ADD    $p,ECX" %}
9054  ins_encode( enc_cmpLTP(p,q,y,tmp) );
9055  ins_pipe( pipe_cmplt );
9056%}
9057
9058/* If I enable this, I encourage spilling in the inner loop of compress.
9059instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
9060  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
9061  effect( USE_KILL tmp, KILL cr );
9062  ins_cost(400);
9063
9064  format %{ "SUB    $p,$q\n\t"
9065            "SBB    ECX,ECX\n\t"
9066            "AND    ECX,$y\n\t"
9067            "ADD    $p,ECX" %}
9068  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
9069%}
9070*/
9071
9072//----------Long Instructions------------------------------------------------
9073// Add Long Register with Register
9074instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9075  match(Set dst (AddL dst src));
9076  effect(KILL cr);
9077  ins_cost(200);
9078  format %{ "ADD    $dst.lo,$src.lo\n\t"
9079            "ADC    $dst.hi,$src.hi" %}
9080  opcode(0x03, 0x13);
9081  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9082  ins_pipe( ialu_reg_reg_long );
9083%}
9084
9085// Add Long Register with Immediate
9086instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9087  match(Set dst (AddL dst src));
9088  effect(KILL cr);
9089  format %{ "ADD    $dst.lo,$src.lo\n\t"
9090            "ADC    $dst.hi,$src.hi" %}
9091  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
9092  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9093  ins_pipe( ialu_reg_long );
9094%}
9095
9096// Add Long Register with Memory
9097instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9098  match(Set dst (AddL dst (LoadL mem)));
9099  effect(KILL cr);
9100  ins_cost(125);
9101  format %{ "ADD    $dst.lo,$mem\n\t"
9102            "ADC    $dst.hi,$mem+4" %}
9103  opcode(0x03, 0x13);
9104  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9105  ins_pipe( ialu_reg_long_mem );
9106%}
9107
9108// Subtract Long Register with Register.
9109instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9110  match(Set dst (SubL dst src));
9111  effect(KILL cr);
9112  ins_cost(200);
9113  format %{ "SUB    $dst.lo,$src.lo\n\t"
9114            "SBB    $dst.hi,$src.hi" %}
9115  opcode(0x2B, 0x1B);
9116  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
9117  ins_pipe( ialu_reg_reg_long );
9118%}
9119
9120// Subtract Long Register with Immediate
9121instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9122  match(Set dst (SubL dst src));
9123  effect(KILL cr);
9124  format %{ "SUB    $dst.lo,$src.lo\n\t"
9125            "SBB    $dst.hi,$src.hi" %}
9126  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
9127  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9128  ins_pipe( ialu_reg_long );
9129%}
9130
9131// Subtract Long Register with Memory
9132instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9133  match(Set dst (SubL dst (LoadL mem)));
9134  effect(KILL cr);
9135  ins_cost(125);
9136  format %{ "SUB    $dst.lo,$mem\n\t"
9137            "SBB    $dst.hi,$mem+4" %}
9138  opcode(0x2B, 0x1B);
9139  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9140  ins_pipe( ialu_reg_long_mem );
9141%}
9142
9143instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
9144  match(Set dst (SubL zero dst));
9145  effect(KILL cr);
9146  ins_cost(300);
9147  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9148  ins_encode( neg_long(dst) );
9149  ins_pipe( ialu_reg_reg_long );
9150%}
9151
9152// And Long Register with Register
9153instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9154  match(Set dst (AndL dst src));
9155  effect(KILL cr);
9156  format %{ "AND    $dst.lo,$src.lo\n\t"
9157            "AND    $dst.hi,$src.hi" %}
9158  opcode(0x23,0x23);
9159  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9160  ins_pipe( ialu_reg_reg_long );
9161%}
9162
9163// And Long Register with Immediate
9164instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9165  match(Set dst (AndL dst src));
9166  effect(KILL cr);
9167  format %{ "AND    $dst.lo,$src.lo\n\t"
9168            "AND    $dst.hi,$src.hi" %}
9169  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9170  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9171  ins_pipe( ialu_reg_long );
9172%}
9173
9174// And Long Register with Memory
9175instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9176  match(Set dst (AndL dst (LoadL mem)));
9177  effect(KILL cr);
9178  ins_cost(125);
9179  format %{ "AND    $dst.lo,$mem\n\t"
9180            "AND    $dst.hi,$mem+4" %}
9181  opcode(0x23, 0x23);
9182  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9183  ins_pipe( ialu_reg_long_mem );
9184%}
9185
9186// Or Long Register with Register
9187instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9188  match(Set dst (OrL dst src));
9189  effect(KILL cr);
9190  format %{ "OR     $dst.lo,$src.lo\n\t"
9191            "OR     $dst.hi,$src.hi" %}
9192  opcode(0x0B,0x0B);
9193  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9194  ins_pipe( ialu_reg_reg_long );
9195%}
9196
9197// Or Long Register with Immediate
9198instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9199  match(Set dst (OrL dst src));
9200  effect(KILL cr);
9201  format %{ "OR     $dst.lo,$src.lo\n\t"
9202            "OR     $dst.hi,$src.hi" %}
9203  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9204  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9205  ins_pipe( ialu_reg_long );
9206%}
9207
9208// Or Long Register with Memory
9209instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9210  match(Set dst (OrL dst (LoadL mem)));
9211  effect(KILL cr);
9212  ins_cost(125);
9213  format %{ "OR     $dst.lo,$mem\n\t"
9214            "OR     $dst.hi,$mem+4" %}
9215  opcode(0x0B,0x0B);
9216  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9217  ins_pipe( ialu_reg_long_mem );
9218%}
9219
9220// Xor Long Register with Register
9221instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9222  match(Set dst (XorL dst src));
9223  effect(KILL cr);
9224  format %{ "XOR    $dst.lo,$src.lo\n\t"
9225            "XOR    $dst.hi,$src.hi" %}
9226  opcode(0x33,0x33);
9227  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9228  ins_pipe( ialu_reg_reg_long );
9229%}
9230
9231// Xor Long Register with Immediate -1
9232instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9233  match(Set dst (XorL dst imm));  
9234  format %{ "NOT    $dst.lo\n\t"
9235            "NOT    $dst.hi" %}
9236  ins_encode %{
9237     __ notl($dst$$Register);
9238     __ notl(HIGH_FROM_LOW($dst$$Register));
9239  %}
9240  ins_pipe( ialu_reg_long );
9241%}
9242
9243// Xor Long Register with Immediate
9244instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9245  match(Set dst (XorL dst src));
9246  effect(KILL cr);
9247  format %{ "XOR    $dst.lo,$src.lo\n\t"
9248            "XOR    $dst.hi,$src.hi" %}
9249  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9250  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9251  ins_pipe( ialu_reg_long );
9252%}
9253
9254// Xor Long Register with Memory
9255instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9256  match(Set dst (XorL dst (LoadL mem)));
9257  effect(KILL cr);
9258  ins_cost(125);
9259  format %{ "XOR    $dst.lo,$mem\n\t"
9260            "XOR    $dst.hi,$mem+4" %}
9261  opcode(0x33,0x33);
9262  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9263  ins_pipe( ialu_reg_long_mem );
9264%}
9265
9266// Shift Left Long by 1
9267instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9268  predicate(UseNewLongLShift);
9269  match(Set dst (LShiftL dst cnt));
9270  effect(KILL cr);
9271  ins_cost(100);
9272  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9273            "ADC    $dst.hi,$dst.hi" %}
9274  ins_encode %{
9275    __ addl($dst$$Register,$dst$$Register);
9276    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9277  %}
9278  ins_pipe( ialu_reg_long );
9279%}
9280
9281// Shift Left Long by 2
9282instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9283  predicate(UseNewLongLShift);
9284  match(Set dst (LShiftL dst cnt));
9285  effect(KILL cr);
9286  ins_cost(100);
9287  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9288            "ADC    $dst.hi,$dst.hi\n\t" 
9289            "ADD    $dst.lo,$dst.lo\n\t"
9290            "ADC    $dst.hi,$dst.hi" %}
9291  ins_encode %{
9292    __ addl($dst$$Register,$dst$$Register);
9293    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9294    __ addl($dst$$Register,$dst$$Register);
9295    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9296  %}
9297  ins_pipe( ialu_reg_long );
9298%}
9299
9300// Shift Left Long by 3
9301instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9302  predicate(UseNewLongLShift);
9303  match(Set dst (LShiftL dst cnt));
9304  effect(KILL cr);
9305  ins_cost(100);
9306  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9307            "ADC    $dst.hi,$dst.hi\n\t" 
9308            "ADD    $dst.lo,$dst.lo\n\t"
9309            "ADC    $dst.hi,$dst.hi\n\t" 
9310            "ADD    $dst.lo,$dst.lo\n\t"
9311            "ADC    $dst.hi,$dst.hi" %}
9312  ins_encode %{
9313    __ addl($dst$$Register,$dst$$Register);
9314    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9315    __ addl($dst$$Register,$dst$$Register);
9316    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9317    __ addl($dst$$Register,$dst$$Register);
9318    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9319  %}
9320  ins_pipe( ialu_reg_long );
9321%}
9322
9323// Shift Left Long by 1-31
9324instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9325  match(Set dst (LShiftL dst cnt));
9326  effect(KILL cr);
9327  ins_cost(200);
9328  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9329            "SHL    $dst.lo,$cnt" %}
9330  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9331  ins_encode( move_long_small_shift(dst,cnt) );
9332  ins_pipe( ialu_reg_long );
9333%}
9334
9335// Shift Left Long by 32-63
9336instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9337  match(Set dst (LShiftL dst cnt));
9338  effect(KILL cr);
9339  ins_cost(300);
9340  format %{ "MOV    $dst.hi,$dst.lo\n"
9341          "\tSHL    $dst.hi,$cnt-32\n"
9342          "\tXOR    $dst.lo,$dst.lo" %}
9343  opcode(0xC1, 0x4);  /* C1 /4 ib */
9344  ins_encode( move_long_big_shift_clr(dst,cnt) );
9345  ins_pipe( ialu_reg_long );
9346%}
9347
9348// Shift Left Long by variable
9349instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9350  match(Set dst (LShiftL dst shift));
9351  effect(KILL cr);
9352  ins_cost(500+200);
9353  size(17);
9354  format %{ "TEST   $shift,32\n\t"
9355            "JEQ,s  small\n\t"
9356            "MOV    $dst.hi,$dst.lo\n\t"
9357            "XOR    $dst.lo,$dst.lo\n"
9358    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9359            "SHL    $dst.lo,$shift" %}
9360  ins_encode( shift_left_long( dst, shift ) );
9361  ins_pipe( pipe_slow );
9362%}
9363
9364// Shift Right Long by 1-31
9365instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9366  match(Set dst (URShiftL dst cnt));
9367  effect(KILL cr);
9368  ins_cost(200);
9369  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9370            "SHR    $dst.hi,$cnt" %}
9371  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9372  ins_encode( move_long_small_shift(dst,cnt) );
9373  ins_pipe( ialu_reg_long );
9374%}
9375
9376// Shift Right Long by 32-63
9377instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9378  match(Set dst (URShiftL dst cnt));
9379  effect(KILL cr);
9380  ins_cost(300);
9381  format %{ "MOV    $dst.lo,$dst.hi\n"
9382          "\tSHR    $dst.lo,$cnt-32\n"
9383          "\tXOR    $dst.hi,$dst.hi" %}
9384  opcode(0xC1, 0x5);  /* C1 /5 ib */
9385  ins_encode( move_long_big_shift_clr(dst,cnt) );
9386  ins_pipe( ialu_reg_long );
9387%}
9388
9389// Shift Right Long by variable
9390instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9391  match(Set dst (URShiftL dst shift));
9392  effect(KILL cr);
9393  ins_cost(600);
9394  size(17);
9395  format %{ "TEST   $shift,32\n\t"
9396            "JEQ,s  small\n\t"
9397            "MOV    $dst.lo,$dst.hi\n\t"
9398            "XOR    $dst.hi,$dst.hi\n"
9399    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9400            "SHR    $dst.hi,$shift" %}
9401  ins_encode( shift_right_long( dst, shift ) );
9402  ins_pipe( pipe_slow );
9403%}
9404
9405// Shift Right Long by 1-31
9406instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9407  match(Set dst (RShiftL dst cnt));
9408  effect(KILL cr);
9409  ins_cost(200);
9410  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9411            "SAR    $dst.hi,$cnt" %}
9412  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9413  ins_encode( move_long_small_shift(dst,cnt) );
9414  ins_pipe( ialu_reg_long );
9415%}
9416
9417// Shift Right Long by 32-63
9418instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9419  match(Set dst (RShiftL dst cnt));
9420  effect(KILL cr);
9421  ins_cost(300);
9422  format %{ "MOV    $dst.lo,$dst.hi\n"
9423          "\tSAR    $dst.lo,$cnt-32\n"
9424          "\tSAR    $dst.hi,31" %}
9425  opcode(0xC1, 0x7);  /* C1 /7 ib */
9426  ins_encode( move_long_big_shift_sign(dst,cnt) );
9427  ins_pipe( ialu_reg_long );
9428%}
9429
9430// Shift Right arithmetic Long by variable
9431instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9432  match(Set dst (RShiftL dst shift));
9433  effect(KILL cr);
9434  ins_cost(600);
9435  size(18);
9436  format %{ "TEST   $shift,32\n\t"
9437            "JEQ,s  small\n\t"
9438            "MOV    $dst.lo,$dst.hi\n\t"
9439            "SAR    $dst.hi,31\n"
9440    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9441            "SAR    $dst.hi,$shift" %}
9442  ins_encode( shift_right_arith_long( dst, shift ) );
9443  ins_pipe( pipe_slow );
9444%}
9445
9446
9447//----------Double Instructions------------------------------------------------
9448// Double Math
9449
9450// Compare & branch
9451
9452// P6 version of float compare, sets condition codes in EFLAGS
9453instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9454  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9455  match(Set cr (CmpD src1 src2));
9456  effect(KILL rax);
9457  ins_cost(150);
9458  format %{ "FLD    $src1\n\t"
9459            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9460            "JNP    exit\n\t"
9461            "MOV    ah,1       // saw a NaN, set CF\n\t"
9462            "SAHF\n"
9463     "exit:\tNOP               // avoid branch to branch" %}
9464  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9465  ins_encode( Push_Reg_DPR(src1),
9466              OpcP, RegOpc(src2),
9467              cmpF_P6_fixup );
9468  ins_pipe( pipe_slow );
9469%}
9470
9471instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9472  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9473  match(Set cr (CmpD src1 src2));
9474  ins_cost(150);
9475  format %{ "FLD    $src1\n\t"
9476            "FUCOMIP ST,$src2  // P6 instruction" %}
9477  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9478  ins_encode( Push_Reg_DPR(src1),
9479              OpcP, RegOpc(src2));
9480  ins_pipe( pipe_slow );
9481%}
9482
9483// Compare & branch
9484instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9485  predicate(UseSSE<=1);
9486  match(Set cr (CmpD src1 src2));
9487  effect(KILL rax);
9488  ins_cost(200);
9489  format %{ "FLD    $src1\n\t"
9490            "FCOMp  $src2\n\t"
9491            "FNSTSW AX\n\t"
9492            "TEST   AX,0x400\n\t"
9493            "JZ,s   flags\n\t"
9494            "MOV    AH,1\t# unordered treat as LT\n"
9495    "flags:\tSAHF" %}
9496  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9497  ins_encode( Push_Reg_DPR(src1),
9498              OpcP, RegOpc(src2),
9499              fpu_flags);
9500  ins_pipe( pipe_slow );
9501%}
9502
9503// Compare vs zero into -1,0,1
9504instruct cmpDPR_0(eRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9505  predicate(UseSSE<=1);
9506  match(Set dst (CmpD3 src1 zero));
9507  effect(KILL cr, KILL rax);
9508  ins_cost(280);
9509  format %{ "FTSTD  $dst,$src1" %}
9510  opcode(0xE4, 0xD9);
9511  ins_encode( Push_Reg_DPR(src1),
9512              OpcS, OpcP, PopFPU,
9513              CmpF_Result(dst));
9514  ins_pipe( pipe_slow );
9515%}
9516
9517// Compare into -1,0,1
9518instruct cmpDPR_reg(eRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9519  predicate(UseSSE<=1);
9520  match(Set dst (CmpD3 src1 src2));
9521  effect(KILL cr, KILL rax);
9522  ins_cost(300);
9523  format %{ "FCMPD  $dst,$src1,$src2" %}
9524  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9525  ins_encode( Push_Reg_DPR(src1),
9526              OpcP, RegOpc(src2),
9527              CmpF_Result(dst));
9528  ins_pipe( pipe_slow );
9529%}
9530
9531// float compare and set condition codes in EFLAGS by XMM regs
9532instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9533  predicate(UseSSE>=2);
9534  match(Set cr (CmpD src1 src2));
9535  ins_cost(145);
9536  format %{ "UCOMISD $src1,$src2\n\t"
9537            "JNP,s   exit\n\t"
9538            "PUSHF\t# saw NaN, set CF\n\t"
9539            "AND     [rsp], #0xffffff2b\n\t"
9540            "POPF\n"
9541    "exit:" %}
9542  ins_encode %{
9543    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9544    emit_cmpfp_fixup(_masm);
9545  %}
9546  ins_pipe( pipe_slow );
9547%}
9548
9549instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9550  predicate(UseSSE>=2);
9551  match(Set cr (CmpD src1 src2));
9552  ins_cost(100);
9553  format %{ "UCOMISD $src1,$src2" %}
9554  ins_encode %{
9555    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9556  %}
9557  ins_pipe( pipe_slow );
9558%}
9559
9560// float compare and set condition codes in EFLAGS by XMM regs
9561instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9562  predicate(UseSSE>=2);
9563  match(Set cr (CmpD src1 (LoadD src2)));
9564  ins_cost(145);
9565  format %{ "UCOMISD $src1,$src2\n\t"
9566            "JNP,s   exit\n\t"
9567            "PUSHF\t# saw NaN, set CF\n\t"
9568            "AND     [rsp], #0xffffff2b\n\t"
9569            "POPF\n"
9570    "exit:" %}
9571  ins_encode %{
9572    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9573    emit_cmpfp_fixup(_masm);
9574  %}
9575  ins_pipe( pipe_slow );
9576%}
9577
9578instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9579  predicate(UseSSE>=2);
9580  match(Set cr (CmpD src1 (LoadD src2)));
9581  ins_cost(100);
9582  format %{ "UCOMISD $src1,$src2" %}
9583  ins_encode %{
9584    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9585  %}
9586  ins_pipe( pipe_slow );
9587%}
9588
9589// Compare into -1,0,1 in XMM
9590instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9591  predicate(UseSSE>=2);
9592  match(Set dst (CmpD3 src1 src2));
9593  effect(KILL cr);
9594  ins_cost(255);
9595  format %{ "UCOMISD $src1, $src2\n\t"
9596            "MOV     $dst, #-1\n\t"
9597            "JP,s    done\n\t"
9598            "JB,s    done\n\t"
9599            "SETNE   $dst\n\t"
9600            "MOVZB   $dst, $dst\n"
9601    "done:" %}
9602  ins_encode %{
9603    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9604    emit_cmpfp3(_masm, $dst$$Register);
9605  %}
9606  ins_pipe( pipe_slow );
9607%}
9608
9609// Compare into -1,0,1 in XMM and memory
9610instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9611  predicate(UseSSE>=2);
9612  match(Set dst (CmpD3 src1 (LoadD src2)));
9613  effect(KILL cr);
9614  ins_cost(275);
9615  format %{ "UCOMISD $src1, $src2\n\t"
9616            "MOV     $dst, #-1\n\t"
9617            "JP,s    done\n\t"
9618            "JB,s    done\n\t"
9619            "SETNE   $dst\n\t"
9620            "MOVZB   $dst, $dst\n"
9621    "done:" %}
9622  ins_encode %{
9623    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9624    emit_cmpfp3(_masm, $dst$$Register);
9625  %}
9626  ins_pipe( pipe_slow );
9627%}
9628
9629
9630instruct subDPR_reg(regDPR dst, regDPR src) %{
9631  predicate (UseSSE <=1);
9632  match(Set dst (SubD dst src));
9633
9634  format %{ "FLD    $src\n\t"
9635            "DSUBp  $dst,ST" %}
9636  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9637  ins_cost(150);
9638  ins_encode( Push_Reg_DPR(src),
9639              OpcP, RegOpc(dst) );
9640  ins_pipe( fpu_reg_reg );
9641%}
9642
9643instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9644  predicate (UseSSE <=1);
9645  match(Set dst (RoundDouble (SubD src1 src2)));
9646  ins_cost(250);
9647
9648  format %{ "FLD    $src2\n\t"
9649            "DSUB   ST,$src1\n\t"
9650            "FSTP_D $dst\t# D-round" %}
9651  opcode(0xD8, 0x5);
9652  ins_encode( Push_Reg_DPR(src2),
9653              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9654  ins_pipe( fpu_mem_reg_reg );
9655%}
9656
9657
9658instruct subDPR_reg_mem(regDPR dst, memory src) %{
9659  predicate (UseSSE <=1);
9660  match(Set dst (SubD dst (LoadD src)));
9661  ins_cost(150);
9662
9663  format %{ "FLD    $src\n\t"
9664            "DSUBp  $dst,ST" %}
9665  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9666  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9667              OpcP, RegOpc(dst) );
9668  ins_pipe( fpu_reg_mem );
9669%}
9670
9671instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9672  predicate (UseSSE<=1);
9673  match(Set dst (AbsD src));
9674  ins_cost(100);
9675  format %{ "FABS" %}
9676  opcode(0xE1, 0xD9);
9677  ins_encode( OpcS, OpcP );
9678  ins_pipe( fpu_reg_reg );
9679%}
9680
9681instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9682  predicate(UseSSE<=1);
9683  match(Set dst (NegD src));
9684  ins_cost(100);
9685  format %{ "FCHS" %}
9686  opcode(0xE0, 0xD9);
9687  ins_encode( OpcS, OpcP );
9688  ins_pipe( fpu_reg_reg );
9689%}
9690
9691instruct addDPR_reg(regDPR dst, regDPR src) %{
9692  predicate(UseSSE<=1);
9693  match(Set dst (AddD dst src));
9694  format %{ "FLD    $src\n\t"
9695            "DADD   $dst,ST" %}
9696  size(4);
9697  ins_cost(150);
9698  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9699  ins_encode( Push_Reg_DPR(src),
9700              OpcP, RegOpc(dst) );
9701  ins_pipe( fpu_reg_reg );
9702%}
9703
9704
9705instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9706  predicate(UseSSE<=1);
9707  match(Set dst (RoundDouble (AddD src1 src2)));
9708  ins_cost(250);
9709
9710  format %{ "FLD    $src2\n\t"
9711            "DADD   ST,$src1\n\t"
9712            "FSTP_D $dst\t# D-round" %}
9713  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9714  ins_encode( Push_Reg_DPR(src2),
9715              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9716  ins_pipe( fpu_mem_reg_reg );
9717%}
9718
9719
9720instruct addDPR_reg_mem(regDPR dst, memory src) %{
9721  predicate(UseSSE<=1);
9722  match(Set dst (AddD dst (LoadD src)));
9723  ins_cost(150);
9724
9725  format %{ "FLD    $src\n\t"
9726            "DADDp  $dst,ST" %}
9727  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9728  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9729              OpcP, RegOpc(dst) );
9730  ins_pipe( fpu_reg_mem );
9731%}
9732
9733// add-to-memory
9734instruct addDPR_mem_reg(memory dst, regDPR src) %{
9735  predicate(UseSSE<=1);
9736  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9737  ins_cost(150);
9738
9739  format %{ "FLD_D  $dst\n\t"
9740            "DADD   ST,$src\n\t"
9741            "FST_D  $dst" %}
9742  opcode(0xDD, 0x0);
9743  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9744              Opcode(0xD8), RegOpc(src),
9745              set_instruction_start,
9746              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9747  ins_pipe( fpu_reg_mem );
9748%}
9749
9750instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9751  predicate(UseSSE<=1);
9752  match(Set dst (AddD dst con));
9753  ins_cost(125);
9754  format %{ "FLD1\n\t"
9755            "DADDp  $dst,ST" %}
9756  ins_encode %{
9757    __ fld1();
9758    __ faddp($dst$$reg);
9759  %}
9760  ins_pipe(fpu_reg);
9761%}
9762
9763instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9764  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9765  match(Set dst (AddD dst con));
9766  ins_cost(200);
9767  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9768            "DADDp  $dst,ST" %}
9769  ins_encode %{
9770    __ fld_d($constantaddress($con));
9771    __ faddp($dst$$reg);
9772  %}
9773  ins_pipe(fpu_reg_mem);
9774%}
9775
9776instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9777  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9778  match(Set dst (RoundDouble (AddD src con)));
9779  ins_cost(200);
9780  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9781            "DADD   ST,$src\n\t"
9782            "FSTP_D $dst\t# D-round" %}
9783  ins_encode %{
9784    __ fld_d($constantaddress($con));
9785    __ fadd($src$$reg);
9786    __ fstp_d(Address(rsp, $dst$$disp));
9787  %}
9788  ins_pipe(fpu_mem_reg_con);
9789%}
9790
9791instruct mulDPR_reg(regDPR dst, regDPR src) %{
9792  predicate(UseSSE<=1);
9793  match(Set dst (MulD dst src));
9794  format %{ "FLD    $src\n\t"
9795            "DMULp  $dst,ST" %}
9796  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9797  ins_cost(150);
9798  ins_encode( Push_Reg_DPR(src),
9799              OpcP, RegOpc(dst) );
9800  ins_pipe( fpu_reg_reg );
9801%}
9802
9803// Strict FP instruction biases argument before multiply then
9804// biases result to avoid double rounding of subnormals.
9805//
9806// scale arg1 by multiplying arg1 by 2^(-15360)
9807// load arg2
9808// multiply scaled arg1 by arg2
9809// rescale product by 2^(15360)
9810//
9811instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9812  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9813  match(Set dst (MulD dst src));
9814  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9815
9816  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9817            "DMULp  $dst,ST\n\t"
9818            "FLD    $src\n\t"
9819            "DMULp  $dst,ST\n\t"
9820            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9821            "DMULp  $dst,ST\n\t" %}
9822  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9823  ins_encode( strictfp_bias1(dst),
9824              Push_Reg_DPR(src),
9825              OpcP, RegOpc(dst),
9826              strictfp_bias2(dst) );
9827  ins_pipe( fpu_reg_reg );
9828%}
9829
9830instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9831  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9832  match(Set dst (MulD dst con));
9833  ins_cost(200);
9834  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9835            "DMULp  $dst,ST" %}
9836  ins_encode %{
9837    __ fld_d($constantaddress($con));
9838    __ fmulp($dst$$reg);
9839  %}
9840  ins_pipe(fpu_reg_mem);
9841%}
9842
9843
9844instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9845  predicate( UseSSE<=1 );
9846  match(Set dst (MulD dst (LoadD src)));
9847  ins_cost(200);
9848  format %{ "FLD_D  $src\n\t"
9849            "DMULp  $dst,ST" %}
9850  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9851  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9852              OpcP, RegOpc(dst) );
9853  ins_pipe( fpu_reg_mem );
9854%}
9855
9856//
9857// Cisc-alternate to reg-reg multiply
9858instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9859  predicate( UseSSE<=1 );
9860  match(Set dst (MulD src (LoadD mem)));
9861  ins_cost(250);
9862  format %{ "FLD_D  $mem\n\t"
9863            "DMUL   ST,$src\n\t"
9864            "FSTP_D $dst" %}
9865  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9866  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9867              OpcReg_FPR(src),
9868              Pop_Reg_DPR(dst) );
9869  ins_pipe( fpu_reg_reg_mem );
9870%}
9871
9872
9873// MACRO3 -- addDPR a mulDPR
9874// This instruction is a '2-address' instruction in that the result goes
9875// back to src2.  This eliminates a move from the macro; possibly the
9876// register allocator will have to add it back (and maybe not).
9877instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9878  predicate( UseSSE<=1 );
9879  match(Set src2 (AddD (MulD src0 src1) src2));
9880  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9881            "DMUL   ST,$src1\n\t"
9882            "DADDp  $src2,ST" %}
9883  ins_cost(250);
9884  opcode(0xDD); /* LoadD DD /0 */
9885  ins_encode( Push_Reg_FPR(src0),
9886              FMul_ST_reg(src1),
9887              FAddP_reg_ST(src2) );
9888  ins_pipe( fpu_reg_reg_reg );
9889%}
9890
9891
9892// MACRO3 -- subDPR a mulDPR
9893instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9894  predicate( UseSSE<=1 );
9895  match(Set src2 (SubD (MulD src0 src1) src2));
9896  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9897            "DMUL   ST,$src1\n\t"
9898            "DSUBRp $src2,ST" %}
9899  ins_cost(250);
9900  ins_encode( Push_Reg_FPR(src0),
9901              FMul_ST_reg(src1),
9902              Opcode(0xDE), Opc_plus(0xE0,src2));
9903  ins_pipe( fpu_reg_reg_reg );
9904%}
9905
9906
9907instruct divDPR_reg(regDPR dst, regDPR src) %{
9908  predicate( UseSSE<=1 );
9909  match(Set dst (DivD dst src));
9910
9911  format %{ "FLD    $src\n\t"
9912            "FDIVp  $dst,ST" %}
9913  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9914  ins_cost(150);
9915  ins_encode( Push_Reg_DPR(src),
9916              OpcP, RegOpc(dst) );
9917  ins_pipe( fpu_reg_reg );
9918%}
9919
9920// Strict FP instruction biases argument before division then
9921// biases result, to avoid double rounding of subnormals.
9922//
9923// scale dividend by multiplying dividend by 2^(-15360)
9924// load divisor
9925// divide scaled dividend by divisor
9926// rescale quotient by 2^(15360)
9927//
9928instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9929  predicate (UseSSE<=1);
9930  match(Set dst (DivD dst src));
9931  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9932  ins_cost(01);
9933
9934  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9935            "DMULp  $dst,ST\n\t"
9936            "FLD    $src\n\t"
9937            "FDIVp  $dst,ST\n\t"
9938            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9939            "DMULp  $dst,ST\n\t" %}
9940  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9941  ins_encode( strictfp_bias1(dst),
9942              Push_Reg_DPR(src),
9943              OpcP, RegOpc(dst),
9944              strictfp_bias2(dst) );
9945  ins_pipe( fpu_reg_reg );
9946%}
9947
9948instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9949  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9950  match(Set dst (RoundDouble (DivD src1 src2)));
9951
9952  format %{ "FLD    $src1\n\t"
9953            "FDIV   ST,$src2\n\t"
9954            "FSTP_D $dst\t# D-round" %}
9955  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9956  ins_encode( Push_Reg_DPR(src1),
9957              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9958  ins_pipe( fpu_mem_reg_reg );
9959%}
9960
9961
9962instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9963  predicate(UseSSE<=1);
9964  match(Set dst (ModD dst src));
9965  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9966
9967  format %{ "DMOD   $dst,$src" %}
9968  ins_cost(250);
9969  ins_encode(Push_Reg_Mod_DPR(dst, src),
9970              emitModDPR(),
9971              Push_Result_Mod_DPR(src),
9972              Pop_Reg_DPR(dst));
9973  ins_pipe( pipe_slow );
9974%}
9975
9976instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9977  predicate(UseSSE>=2);
9978  match(Set dst (ModD src0 src1));
9979  effect(KILL rax, KILL cr);
9980
9981  format %{ "SUB    ESP,8\t # DMOD\n"
9982          "\tMOVSD  [ESP+0],$src1\n"
9983          "\tFLD_D  [ESP+0]\n"
9984          "\tMOVSD  [ESP+0],$src0\n"
9985          "\tFLD_D  [ESP+0]\n"
9986     "loop:\tFPREM\n"
9987          "\tFWAIT\n"
9988          "\tFNSTSW AX\n"
9989          "\tSAHF\n"
9990          "\tJP     loop\n"
9991          "\tFSTP_D [ESP+0]\n"
9992          "\tMOVSD  $dst,[ESP+0]\n"
9993          "\tADD    ESP,8\n"
9994          "\tFSTP   ST0\t # Restore FPU Stack"
9995    %}
9996  ins_cost(250);
9997  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9998  ins_pipe( pipe_slow );
9999%}
10000
10001instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
10002  predicate (UseSSE<=1);
10003  match(Set dst (SinD src));
10004  ins_cost(1800);
10005  format %{ "DSIN   $dst" %}
10006  opcode(0xD9, 0xFE);
10007  ins_encode( OpcP, OpcS );
10008  ins_pipe( pipe_slow );
10009%}
10010
10011instruct sinD_reg(regD dst, eFlagsReg cr) %{
10012  predicate (UseSSE>=2);
10013  match(Set dst (SinD dst));
10014  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10015  ins_cost(1800);
10016  format %{ "DSIN   $dst" %}
10017  opcode(0xD9, 0xFE);
10018  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10019  ins_pipe( pipe_slow );
10020%}
10021
10022instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
10023  predicate (UseSSE<=1);
10024  match(Set dst (CosD src));
10025  ins_cost(1800);
10026  format %{ "DCOS   $dst" %}
10027  opcode(0xD9, 0xFF);
10028  ins_encode( OpcP, OpcS );
10029  ins_pipe( pipe_slow );
10030%}
10031
10032instruct cosD_reg(regD dst, eFlagsReg cr) %{
10033  predicate (UseSSE>=2);
10034  match(Set dst (CosD dst));
10035  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10036  ins_cost(1800);
10037  format %{ "DCOS   $dst" %}
10038  opcode(0xD9, 0xFF);
10039  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
10040  ins_pipe( pipe_slow );
10041%}
10042
10043instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
10044  predicate (UseSSE<=1);
10045  match(Set dst(TanD src));
10046  format %{ "DTAN   $dst" %}
10047  ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10048              Opcode(0xDD), Opcode(0xD8));   // fstp st
10049  ins_pipe( pipe_slow );
10050%}
10051
10052instruct tanD_reg(regD dst, eFlagsReg cr) %{
10053  predicate (UseSSE>=2);
10054  match(Set dst(TanD dst));
10055  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10056  format %{ "DTAN   $dst" %}
10057  ins_encode( Push_SrcD(dst),
10058              Opcode(0xD9), Opcode(0xF2),    // fptan
10059              Opcode(0xDD), Opcode(0xD8),   // fstp st
10060              Push_ResultD(dst) );
10061  ins_pipe( pipe_slow );
10062%}
10063
10064instruct atanDPR_reg(regDPR dst, regDPR src) %{
10065  predicate (UseSSE<=1);
10066  match(Set dst(AtanD dst src));
10067  format %{ "DATA   $dst,$src" %}
10068  opcode(0xD9, 0xF3);
10069  ins_encode( Push_Reg_DPR(src),
10070              OpcP, OpcS, RegOpc(dst) );
10071  ins_pipe( pipe_slow );
10072%}
10073
10074instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
10075  predicate (UseSSE>=2);
10076  match(Set dst(AtanD dst src));
10077  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
10078  format %{ "DATA   $dst,$src" %}
10079  opcode(0xD9, 0xF3);
10080  ins_encode( Push_SrcD(src),
10081              OpcP, OpcS, Push_ResultD(dst) );
10082  ins_pipe( pipe_slow );
10083%}
10084
10085instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
10086  predicate (UseSSE<=1);
10087  match(Set dst (SqrtD src));
10088  format %{ "DSQRT  $dst,$src" %}
10089  opcode(0xFA, 0xD9);
10090  ins_encode( Push_Reg_DPR(src),
10091              OpcS, OpcP, Pop_Reg_DPR(dst) );
10092  ins_pipe( pipe_slow );
10093%}
10094
10095instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10096  predicate (UseSSE<=1);
10097  match(Set Y (PowD X Y));  // Raise X to the Yth power
10098  effect(KILL rax, KILL rbx, KILL rcx);
10099  format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10100            "FLD_D  $X\n\t"
10101            "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10102
10103            "FDUP   \t\t\t# Q Q\n\t"
10104            "FRNDINT\t\t\t# int(Q) Q\n\t"
10105            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10106            "FISTP  dword [ESP]\n\t"
10107            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10108            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10109            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10110            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10111            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10112            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10113            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10114            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10115            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10116            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10117            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10118            "MOV    [ESP+0],0\n\t"
10119            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10120
10121            "ADD    ESP,8"
10122             %}
10123  ins_encode( push_stack_temp_qword,
10124              Push_Reg_DPR(X),
10125              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10126              pow_exp_core_encoding,
10127              pop_stack_temp_qword);
10128  ins_pipe( pipe_slow );
10129%}
10130
10131instruct powD_reg(regD dst, regD src0, regD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10132  predicate (UseSSE>=2);
10133  match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10134  effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10135  format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10136            "MOVSD  [ESP],$src1\n\t"
10137            "FLD    FPR1,$src1\n\t"
10138            "MOVSD  [ESP],$src0\n\t"
10139            "FLD    FPR1,$src0\n\t"
10140            "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10141
10142            "FDUP   \t\t\t# Q Q\n\t"
10143            "FRNDINT\t\t\t# int(Q) Q\n\t"
10144            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10145            "FISTP  dword [ESP]\n\t"
10146            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10147            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10148            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10149            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10150            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10151            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10152            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10153            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10154            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10155            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10156            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10157            "MOV    [ESP+0],0\n\t"
10158            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10159
10160            "FST_D  [ESP]\n\t"
10161            "MOVSD  $dst,[ESP]\n\t"
10162            "ADD    ESP,8"
10163             %}
10164  ins_encode( push_stack_temp_qword,
10165              push_xmm_to_fpr1(src1),
10166              push_xmm_to_fpr1(src0),
10167              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10168              pow_exp_core_encoding,
10169              Push_ResultD(dst) );
10170  ins_pipe( pipe_slow );
10171%}
10172
10173
10174instruct expDPR_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10175  predicate (UseSSE<=1);
10176  match(Set dpr1 (ExpD dpr1));
10177  effect(KILL rax, KILL rbx, KILL rcx);
10178  format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10179            "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10180            "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10181
10182            "FDUP   \t\t\t# Q Q\n\t"
10183            "FRNDINT\t\t\t# int(Q) Q\n\t"
10184            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10185            "FISTP  dword [ESP]\n\t"
10186            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10187            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10188            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10189            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10190            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10191            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10192            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10193            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10194            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10195            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10196            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10197            "MOV    [ESP+0],0\n\t"
10198            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10199
10200            "ADD    ESP,8"
10201             %}
10202  ins_encode( push_stack_temp_qword,
10203              Opcode(0xD9), Opcode(0xEA),   // fldl2e
10204              Opcode(0xDE), Opcode(0xC9),   // fmulp
10205              pow_exp_core_encoding,
10206              pop_stack_temp_qword);
10207  ins_pipe( pipe_slow );
10208%}
10209
10210instruct expD_reg(regD dst, regD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10211  predicate (UseSSE>=2);
10212  match(Set dst (ExpD src));
10213  effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10214  format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10215            "MOVSD  [ESP],$src\n\t"
10216            "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10217            "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10218
10219            "FDUP   \t\t\t# Q Q\n\t"
10220            "FRNDINT\t\t\t# int(Q) Q\n\t"
10221            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10222            "FISTP  dword [ESP]\n\t"
10223            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10224            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10225            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10226            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10227            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10228            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10229            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10230            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10231            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10232            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10233            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10234            "MOV    [ESP+0],0\n\t"
10235            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10236
10237            "FST_D  [ESP]\n\t"
10238            "MOVSD  $dst,[ESP]\n\t"
10239            "ADD    ESP,8"
10240             %}
10241  ins_encode( Push_SrcD(src),
10242              Opcode(0xD9), Opcode(0xEA),   // fldl2e
10243              Opcode(0xDE), Opcode(0xC9),   // fmulp
10244              pow_exp_core_encoding,
10245              Push_ResultD(dst) );
10246  ins_pipe( pipe_slow );
10247%}
10248
10249
10250
10251instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
10252  predicate (UseSSE<=1);
10253  // The source Double operand on FPU stack
10254  match(Set dst (Log10D src));
10255  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10256  // fxch         ; swap ST(0) with ST(1)
10257  // fyl2x        ; compute log_10(2) * log_2(x)
10258  format %{ "FLDLG2 \t\t\t#Log10\n\t"
10259            "FXCH   \n\t"
10260            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10261         %}
10262  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10263              Opcode(0xD9), Opcode(0xC9),   // fxch
10264              Opcode(0xD9), Opcode(0xF1));  // fyl2x
10265
10266  ins_pipe( pipe_slow );
10267%}
10268
10269instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
10270  predicate (UseSSE>=2);
10271  effect(KILL cr);
10272  match(Set dst (Log10D src));
10273  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10274  // fyl2x        ; compute log_10(2) * log_2(x)
10275  format %{ "FLDLG2 \t\t\t#Log10\n\t"
10276            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10277         %}
10278  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10279              Push_SrcD(src),
10280              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10281              Push_ResultD(dst));
10282
10283  ins_pipe( pipe_slow );
10284%}
10285
10286instruct logDPR_reg(regDPR1 dst, regDPR1 src) %{
10287  predicate (UseSSE<=1);
10288  // The source Double operand on FPU stack
10289  match(Set dst (LogD src));
10290  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10291  // fxch         ; swap ST(0) with ST(1)
10292  // fyl2x        ; compute log_e(2) * log_2(x)
10293  format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10294            "FXCH   \n\t"
10295            "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10296         %}
10297  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10298              Opcode(0xD9), Opcode(0xC9),   // fxch
10299              Opcode(0xD9), Opcode(0xF1));  // fyl2x
10300
10301  ins_pipe( pipe_slow );
10302%}
10303
10304instruct logD_reg(regD dst, regD src, eFlagsReg cr) %{
10305  predicate (UseSSE>=2);
10306  effect(KILL cr);
10307  // The source and result Double operands in XMM registers
10308  match(Set dst (LogD src));
10309  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10310  // fyl2x        ; compute log_e(2) * log_2(x)
10311  format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10312            "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10313         %}
10314  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10315              Push_SrcD(src),
10316              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10317              Push_ResultD(dst));
10318  ins_pipe( pipe_slow );
10319%}
10320
10321//-------------Float Instructions-------------------------------
10322// Float Math
10323
10324// Code for float compare:
10325//     fcompp();
10326//     fwait(); fnstsw_ax();
10327//     sahf();
10328//     movl(dst, unordered_result);
10329//     jcc(Assembler::parity, exit);
10330//     movl(dst, less_result);
10331//     jcc(Assembler::below, exit);
10332//     movl(dst, equal_result);
10333//     jcc(Assembler::equal, exit);
10334//     movl(dst, greater_result);
10335//   exit:
10336
10337// P6 version of float compare, sets condition codes in EFLAGS
10338instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10339  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10340  match(Set cr (CmpF src1 src2));
10341  effect(KILL rax);
10342  ins_cost(150);
10343  format %{ "FLD    $src1\n\t"
10344            "FUCOMIP ST,$src2  // P6 instruction\n\t"
10345            "JNP    exit\n\t"
10346            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10347            "SAHF\n"
10348     "exit:\tNOP               // avoid branch to branch" %}
10349  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10350  ins_encode( Push_Reg_DPR(src1),
10351              OpcP, RegOpc(src2),
10352              cmpF_P6_fixup );
10353  ins_pipe( pipe_slow );
10354%}
10355
10356instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
10357  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10358  match(Set cr (CmpF src1 src2));
10359  ins_cost(100);
10360  format %{ "FLD    $src1\n\t"
10361            "FUCOMIP ST,$src2  // P6 instruction" %}
10362  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10363  ins_encode( Push_Reg_DPR(src1),
10364              OpcP, RegOpc(src2));
10365  ins_pipe( pipe_slow );
10366%}
10367
10368
10369// Compare & branch
10370instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10371  predicate(UseSSE == 0);
10372  match(Set cr (CmpF src1 src2));
10373  effect(KILL rax);
10374  ins_cost(200);
10375  format %{ "FLD    $src1\n\t"
10376            "FCOMp  $src2\n\t"
10377            "FNSTSW AX\n\t"
10378            "TEST   AX,0x400\n\t"
10379            "JZ,s   flags\n\t"
10380            "MOV    AH,1\t# unordered treat as LT\n"
10381    "flags:\tSAHF" %}
10382  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10383  ins_encode( Push_Reg_DPR(src1),
10384              OpcP, RegOpc(src2),
10385              fpu_flags);
10386  ins_pipe( pipe_slow );
10387%}
10388
10389// Compare vs zero into -1,0,1
10390instruct cmpFPR_0(eRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10391  predicate(UseSSE == 0);
10392  match(Set dst (CmpF3 src1 zero));
10393  effect(KILL cr, KILL rax);
10394  ins_cost(280);
10395  format %{ "FTSTF  $dst,$src1" %}
10396  opcode(0xE4, 0xD9);
10397  ins_encode( Push_Reg_DPR(src1),
10398              OpcS, OpcP, PopFPU,
10399              CmpF_Result(dst));
10400  ins_pipe( pipe_slow );
10401%}
10402
10403// Compare into -1,0,1
10404instruct cmpFPR_reg(eRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10405  predicate(UseSSE == 0);
10406  match(Set dst (CmpF3 src1 src2));
10407  effect(KILL cr, KILL rax);
10408  ins_cost(300);
10409  format %{ "FCMPF  $dst,$src1,$src2" %}
10410  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10411  ins_encode( Push_Reg_DPR(src1),
10412              OpcP, RegOpc(src2),
10413              CmpF_Result(dst));
10414  ins_pipe( pipe_slow );
10415%}
10416
10417// float compare and set condition codes in EFLAGS by XMM regs
10418instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10419  predicate(UseSSE>=1);
10420  match(Set cr (CmpF src1 src2));
10421  ins_cost(145);
10422  format %{ "UCOMISS $src1,$src2\n\t"
10423            "JNP,s   exit\n\t"
10424            "PUSHF\t# saw NaN, set CF\n\t"
10425            "AND     [rsp], #0xffffff2b\n\t"
10426            "POPF\n"
10427    "exit:" %}
10428  ins_encode %{
10429    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10430    emit_cmpfp_fixup(_masm);
10431  %}
10432  ins_pipe( pipe_slow );
10433%}
10434
10435instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10436  predicate(UseSSE>=1);
10437  match(Set cr (CmpF src1 src2));
10438  ins_cost(100);
10439  format %{ "UCOMISS $src1,$src2" %}
10440  ins_encode %{
10441    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10442  %}
10443  ins_pipe( pipe_slow );
10444%}
10445
10446// float compare and set condition codes in EFLAGS by XMM regs
10447instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10448  predicate(UseSSE>=1);
10449  match(Set cr (CmpF src1 (LoadF src2)));
10450  ins_cost(165);
10451  format %{ "UCOMISS $src1,$src2\n\t"
10452            "JNP,s   exit\n\t"
10453            "PUSHF\t# saw NaN, set CF\n\t"
10454            "AND     [rsp], #0xffffff2b\n\t"
10455            "POPF\n"
10456    "exit:" %}
10457  ins_encode %{
10458    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10459    emit_cmpfp_fixup(_masm);
10460  %}
10461  ins_pipe( pipe_slow );
10462%}
10463
10464instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10465  predicate(UseSSE>=1);
10466  match(Set cr (CmpF src1 (LoadF src2)));
10467  ins_cost(100);
10468  format %{ "UCOMISS $src1,$src2" %}
10469  ins_encode %{
10470    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10471  %}
10472  ins_pipe( pipe_slow );
10473%}
10474
10475// Compare into -1,0,1 in XMM
10476instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10477  predicate(UseSSE>=1);
10478  match(Set dst (CmpF3 src1 src2));
10479  effect(KILL cr);
10480  ins_cost(255);
10481  format %{ "UCOMISS $src1, $src2\n\t"
10482            "MOV     $dst, #-1\n\t"
10483            "JP,s    done\n\t"
10484            "JB,s    done\n\t"
10485            "SETNE   $dst\n\t"
10486            "MOVZB   $dst, $dst\n"
10487    "done:" %}
10488  ins_encode %{
10489    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10490    emit_cmpfp3(_masm, $dst$$Register);
10491  %}
10492  ins_pipe( pipe_slow );
10493%}
10494
10495// Compare into -1,0,1 in XMM and memory
10496instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10497  predicate(UseSSE>=1);
10498  match(Set dst (CmpF3 src1 (LoadF src2)));
10499  effect(KILL cr);
10500  ins_cost(275);
10501  format %{ "UCOMISS $src1, $src2\n\t"
10502            "MOV     $dst, #-1\n\t"
10503            "JP,s    done\n\t"
10504            "JB,s    done\n\t"
10505            "SETNE   $dst\n\t"
10506            "MOVZB   $dst, $dst\n"
10507    "done:" %}
10508  ins_encode %{
10509    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10510    emit_cmpfp3(_masm, $dst$$Register);
10511  %}
10512  ins_pipe( pipe_slow );
10513%}
10514
10515// Spill to obtain 24-bit precision
10516instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10517  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10518  match(Set dst (SubF src1 src2));
10519
10520  format %{ "FSUB   $dst,$src1 - $src2" %}
10521  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10522  ins_encode( Push_Reg_FPR(src1),
10523              OpcReg_FPR(src2),
10524              Pop_Mem_FPR(dst) );
10525  ins_pipe( fpu_mem_reg_reg );
10526%}
10527//
10528// This instruction does not round to 24-bits
10529instruct subFPR_reg(regFPR dst, regFPR src) %{
10530  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10531  match(Set dst (SubF dst src));
10532
10533  format %{ "FSUB   $dst,$src" %}
10534  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10535  ins_encode( Push_Reg_FPR(src),
10536              OpcP, RegOpc(dst) );
10537  ins_pipe( fpu_reg_reg );
10538%}
10539
10540// Spill to obtain 24-bit precision
10541instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10542  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10543  match(Set dst (AddF src1 src2));
10544
10545  format %{ "FADD   $dst,$src1,$src2" %}
10546  opcode(0xD8, 0x0); /* D8 C0+i */
10547  ins_encode( Push_Reg_FPR(src2),
10548              OpcReg_FPR(src1),
10549              Pop_Mem_FPR(dst) );
10550  ins_pipe( fpu_mem_reg_reg );
10551%}
10552//
10553// This instruction does not round to 24-bits
10554instruct addFPR_reg(regFPR dst, regFPR src) %{
10555  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10556  match(Set dst (AddF dst src));
10557
10558  format %{ "FLD    $src\n\t"
10559            "FADDp  $dst,ST" %}
10560  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10561  ins_encode( Push_Reg_FPR(src),
10562              OpcP, RegOpc(dst) );
10563  ins_pipe( fpu_reg_reg );
10564%}
10565
10566instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10567  predicate(UseSSE==0);
10568  match(Set dst (AbsF src));
10569  ins_cost(100);
10570  format %{ "FABS" %}
10571  opcode(0xE1, 0xD9);
10572  ins_encode( OpcS, OpcP );
10573  ins_pipe( fpu_reg_reg );
10574%}
10575
10576instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10577  predicate(UseSSE==0);
10578  match(Set dst (NegF src));
10579  ins_cost(100);
10580  format %{ "FCHS" %}
10581  opcode(0xE0, 0xD9);
10582  ins_encode( OpcS, OpcP );
10583  ins_pipe( fpu_reg_reg );
10584%}
10585
10586// Cisc-alternate to addFPR_reg
10587// Spill to obtain 24-bit precision
10588instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10589  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10590  match(Set dst (AddF src1 (LoadF src2)));
10591
10592  format %{ "FLD    $src2\n\t"
10593            "FADD   ST,$src1\n\t"
10594            "FSTP_S $dst" %}
10595  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10596  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10597              OpcReg_FPR(src1),
10598              Pop_Mem_FPR(dst) );
10599  ins_pipe( fpu_mem_reg_mem );
10600%}
10601//
10602// Cisc-alternate to addFPR_reg
10603// This instruction does not round to 24-bits
10604instruct addFPR_reg_mem(regFPR dst, memory src) %{
10605  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10606  match(Set dst (AddF dst (LoadF src)));
10607
10608  format %{ "FADD   $dst,$src" %}
10609  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10610  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10611              OpcP, RegOpc(dst) );
10612  ins_pipe( fpu_reg_mem );
10613%}
10614
10615// // Following two instructions for _222_mpegaudio
10616// Spill to obtain 24-bit precision
10617instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10618  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10619  match(Set dst (AddF src1 src2));
10620
10621  format %{ "FADD   $dst,$src1,$src2" %}
10622  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10623  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10624              OpcReg_FPR(src2),
10625              Pop_Mem_FPR(dst) );
10626  ins_pipe( fpu_mem_reg_mem );
10627%}
10628
10629// Cisc-spill variant
10630// Spill to obtain 24-bit precision
10631instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10632  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10633  match(Set dst (AddF src1 (LoadF src2)));
10634
10635  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10636  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10637  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10638              set_instruction_start,
10639              OpcP, RMopc_Mem(secondary,src1),
10640              Pop_Mem_FPR(dst) );
10641  ins_pipe( fpu_mem_mem_mem );
10642%}
10643
10644// Spill to obtain 24-bit precision
10645instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10646  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10647  match(Set dst (AddF src1 src2));
10648
10649  format %{ "FADD   $dst,$src1,$src2" %}
10650  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10651  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10652              set_instruction_start,
10653              OpcP, RMopc_Mem(secondary,src1),
10654              Pop_Mem_FPR(dst) );
10655  ins_pipe( fpu_mem_mem_mem );
10656%}
10657
10658
10659// Spill to obtain 24-bit precision
10660instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10661  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10662  match(Set dst (AddF src con));
10663  format %{ "FLD    $src\n\t"
10664            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10665            "FSTP_S $dst"  %}
10666  ins_encode %{
10667    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10668    __ fadd_s($constantaddress($con));
10669    __ fstp_s(Address(rsp, $dst$$disp));
10670  %}
10671  ins_pipe(fpu_mem_reg_con);
10672%}
10673//
10674// This instruction does not round to 24-bits
10675instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10676  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10677  match(Set dst (AddF src con));
10678  format %{ "FLD    $src\n\t"
10679            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10680            "FSTP   $dst"  %}
10681  ins_encode %{
10682    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10683    __ fadd_s($constantaddress($con));
10684    __ fstp_d($dst$$reg);
10685  %}
10686  ins_pipe(fpu_reg_reg_con);
10687%}
10688
10689// Spill to obtain 24-bit precision
10690instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10691  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10692  match(Set dst (MulF src1 src2));
10693
10694  format %{ "FLD    $src1\n\t"
10695            "FMUL   $src2\n\t"
10696            "FSTP_S $dst"  %}
10697  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10698  ins_encode( Push_Reg_FPR(src1),
10699              OpcReg_FPR(src2),
10700              Pop_Mem_FPR(dst) );
10701  ins_pipe( fpu_mem_reg_reg );
10702%}
10703//
10704// This instruction does not round to 24-bits
10705instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10706  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10707  match(Set dst (MulF src1 src2));
10708
10709  format %{ "FLD    $src1\n\t"
10710            "FMUL   $src2\n\t"
10711            "FSTP_S $dst"  %}
10712  opcode(0xD8, 0x1); /* D8 C8+i */
10713  ins_encode( Push_Reg_FPR(src2),
10714              OpcReg_FPR(src1),
10715              Pop_Reg_FPR(dst) );
10716  ins_pipe( fpu_reg_reg_reg );
10717%}
10718
10719
10720// Spill to obtain 24-bit precision
10721// Cisc-alternate to reg-reg multiply
10722instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10723  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10724  match(Set dst (MulF src1 (LoadF src2)));
10725
10726  format %{ "FLD_S  $src2\n\t"
10727            "FMUL   $src1\n\t"
10728            "FSTP_S $dst"  %}
10729  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10730  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10731              OpcReg_FPR(src1),
10732              Pop_Mem_FPR(dst) );
10733  ins_pipe( fpu_mem_reg_mem );
10734%}
10735//
10736// This instruction does not round to 24-bits
10737// Cisc-alternate to reg-reg multiply
10738instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10739  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10740  match(Set dst (MulF src1 (LoadF src2)));
10741
10742  format %{ "FMUL   $dst,$src1,$src2" %}
10743  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10744  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10745              OpcReg_FPR(src1),
10746              Pop_Reg_FPR(dst) );
10747  ins_pipe( fpu_reg_reg_mem );
10748%}
10749
10750// Spill to obtain 24-bit precision
10751instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10752  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10753  match(Set dst (MulF src1 src2));
10754
10755  format %{ "FMUL   $dst,$src1,$src2" %}
10756  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10757  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10758              set_instruction_start,
10759              OpcP, RMopc_Mem(secondary,src1),
10760              Pop_Mem_FPR(dst) );
10761  ins_pipe( fpu_mem_mem_mem );
10762%}
10763
10764// Spill to obtain 24-bit precision
10765instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10766  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10767  match(Set dst (MulF src con));
10768
10769  format %{ "FLD    $src\n\t"
10770            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10771            "FSTP_S $dst"  %}
10772  ins_encode %{
10773    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10774    __ fmul_s($constantaddress($con));
10775    __ fstp_s(Address(rsp, $dst$$disp));
10776  %}
10777  ins_pipe(fpu_mem_reg_con);
10778%}
10779//
10780// This instruction does not round to 24-bits
10781instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10782  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10783  match(Set dst (MulF src con));
10784
10785  format %{ "FLD    $src\n\t"
10786            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10787            "FSTP   $dst"  %}
10788  ins_encode %{
10789    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10790    __ fmul_s($constantaddress($con));
10791    __ fstp_d($dst$$reg);
10792  %}
10793  ins_pipe(fpu_reg_reg_con);
10794%}
10795
10796
10797//
10798// MACRO1 -- subsume unshared load into mulFPR
10799// This instruction does not round to 24-bits
10800instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10801  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10802  match(Set dst (MulF (LoadF mem1) src));
10803
10804  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10805            "FMUL   ST,$src\n\t"
10806            "FSTP   $dst" %}
10807  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10808  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10809              OpcReg_FPR(src),
10810              Pop_Reg_FPR(dst) );
10811  ins_pipe( fpu_reg_reg_mem );
10812%}
10813//
10814// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10815// This instruction does not round to 24-bits
10816instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10817  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10818  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10819  ins_cost(95);
10820
10821  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10822            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10823            "FADD   ST,$src2\n\t"
10824            "FSTP   $dst" %}
10825  opcode(0xD9); /* LoadF D9 /0 */
10826  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10827              FMul_ST_reg(src1),
10828              FAdd_ST_reg(src2),
10829              Pop_Reg_FPR(dst) );
10830  ins_pipe( fpu_reg_mem_reg_reg );
10831%}
10832
10833// MACRO3 -- addFPR a mulFPR
10834// This instruction does not round to 24-bits.  It is a '2-address'
10835// instruction in that the result goes back to src2.  This eliminates
10836// a move from the macro; possibly the register allocator will have
10837// to add it back (and maybe not).
10838instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10839  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10840  match(Set src2 (AddF (MulF src0 src1) src2));
10841
10842  format %{ "FLD    $src0     ===MACRO3===\n\t"
10843            "FMUL   ST,$src1\n\t"
10844            "FADDP  $src2,ST" %}
10845  opcode(0xD9); /* LoadF D9 /0 */
10846  ins_encode( Push_Reg_FPR(src0),
10847              FMul_ST_reg(src1),
10848              FAddP_reg_ST(src2) );
10849  ins_pipe( fpu_reg_reg_reg );
10850%}
10851
10852// MACRO4 -- divFPR subFPR
10853// This instruction does not round to 24-bits
10854instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10855  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10856  match(Set dst (DivF (SubF src2 src1) src3));
10857
10858  format %{ "FLD    $src2   ===MACRO4===\n\t"
10859            "FSUB   ST,$src1\n\t"
10860            "FDIV   ST,$src3\n\t"
10861            "FSTP  $dst" %}
10862  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10863  ins_encode( Push_Reg_FPR(src2),
10864              subFPR_divFPR_encode(src1,src3),
10865              Pop_Reg_FPR(dst) );
10866  ins_pipe( fpu_reg_reg_reg_reg );
10867%}
10868
10869// Spill to obtain 24-bit precision
10870instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10871  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10872  match(Set dst (DivF src1 src2));
10873
10874  format %{ "FDIV   $dst,$src1,$src2" %}
10875  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10876  ins_encode( Push_Reg_FPR(src1),
10877              OpcReg_FPR(src2),
10878              Pop_Mem_FPR(dst) );
10879  ins_pipe( fpu_mem_reg_reg );
10880%}
10881//
10882// This instruction does not round to 24-bits
10883instruct divFPR_reg(regFPR dst, regFPR src) %{
10884  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10885  match(Set dst (DivF dst src));
10886
10887  format %{ "FDIV   $dst,$src" %}
10888  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10889  ins_encode( Push_Reg_FPR(src),
10890              OpcP, RegOpc(dst) );
10891  ins_pipe( fpu_reg_reg );
10892%}
10893
10894
10895// Spill to obtain 24-bit precision
10896instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10897  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10898  match(Set dst (ModF src1 src2));
10899  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10900
10901  format %{ "FMOD   $dst,$src1,$src2" %}
10902  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10903              emitModDPR(),
10904              Push_Result_Mod_DPR(src2),
10905              Pop_Mem_FPR(dst));
10906  ins_pipe( pipe_slow );
10907%}
10908//
10909// This instruction does not round to 24-bits
10910instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10911  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10912  match(Set dst (ModF dst src));
10913  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10914
10915  format %{ "FMOD   $dst,$src" %}
10916  ins_encode(Push_Reg_Mod_DPR(dst, src),
10917              emitModDPR(),
10918              Push_Result_Mod_DPR(src),
10919              Pop_Reg_FPR(dst));
10920  ins_pipe( pipe_slow );
10921%}
10922
10923instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10924  predicate(UseSSE>=1);
10925  match(Set dst (ModF src0 src1));
10926  effect(KILL rax, KILL cr);
10927  format %{ "SUB    ESP,4\t # FMOD\n"
10928          "\tMOVSS  [ESP+0],$src1\n"
10929          "\tFLD_S  [ESP+0]\n"
10930          "\tMOVSS  [ESP+0],$src0\n"
10931          "\tFLD_S  [ESP+0]\n"
10932     "loop:\tFPREM\n"
10933          "\tFWAIT\n"
10934          "\tFNSTSW AX\n"
10935          "\tSAHF\n"
10936          "\tJP     loop\n"
10937          "\tFSTP_S [ESP+0]\n"
10938          "\tMOVSS  $dst,[ESP+0]\n"
10939          "\tADD    ESP,4\n"
10940          "\tFSTP   ST0\t # Restore FPU Stack"
10941    %}
10942  ins_cost(250);
10943  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10944  ins_pipe( pipe_slow );
10945%}
10946
10947
10948//----------Arithmetic Conversion Instructions---------------------------------
10949// The conversions operations are all Alpha sorted.  Please keep it that way!
10950
10951instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10952  predicate(UseSSE==0);
10953  match(Set dst (RoundFloat src));
10954  ins_cost(125);
10955  format %{ "FST_S  $dst,$src\t# F-round" %}
10956  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10957  ins_pipe( fpu_mem_reg );
10958%}
10959
10960instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10961  predicate(UseSSE<=1);
10962  match(Set dst (RoundDouble src));
10963  ins_cost(125);
10964  format %{ "FST_D  $dst,$src\t# D-round" %}
10965  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10966  ins_pipe( fpu_mem_reg );
10967%}
10968
10969// Force rounding to 24-bit precision and 6-bit exponent
10970instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10971  predicate(UseSSE==0);
10972  match(Set dst (ConvD2F src));
10973  format %{ "FST_S  $dst,$src\t# F-round" %}
10974  expand %{
10975    roundFloat_mem_reg(dst,src);
10976  %}
10977%}
10978
10979// Force rounding to 24-bit precision and 6-bit exponent
10980instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10981  predicate(UseSSE==1);
10982  match(Set dst (ConvD2F src));
10983  effect( KILL cr );
10984  format %{ "SUB    ESP,4\n\t"
10985            "FST_S  [ESP],$src\t# F-round\n\t"
10986            "MOVSS  $dst,[ESP]\n\t"
10987            "ADD ESP,4" %}
10988  ins_encode %{
10989    __ subptr(rsp, 4);
10990    if ($src$$reg != FPR1L_enc) {
10991      __ fld_s($src$$reg-1);
10992      __ fstp_s(Address(rsp, 0));
10993    } else {
10994      __ fst_s(Address(rsp, 0));
10995    }
10996    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10997    __ addptr(rsp, 4);
10998  %}
10999  ins_pipe( pipe_slow );
11000%}
11001
11002// Force rounding double precision to single precision
11003instruct convD2F_reg(regF dst, regD src) %{
11004  predicate(UseSSE>=2);
11005  match(Set dst (ConvD2F src));
11006  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11007  ins_encode %{
11008    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
11009  %}
11010  ins_pipe( pipe_slow );
11011%}
11012
11013instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
11014  predicate(UseSSE==0);
11015  match(Set dst (ConvF2D src));
11016  format %{ "FST_S  $dst,$src\t# D-round" %}
11017  ins_encode( Pop_Reg_Reg_DPR(dst, src));
11018  ins_pipe( fpu_reg_reg );
11019%}
11020
11021instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
11022  predicate(UseSSE==1);
11023  match(Set dst (ConvF2D src));
11024  format %{ "FST_D  $dst,$src\t# D-round" %}
11025  expand %{
11026    roundDouble_mem_reg(dst,src);
11027  %}
11028%}
11029
11030instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
11031  predicate(UseSSE==1);
11032  match(Set dst (ConvF2D src));
11033  effect( KILL cr );
11034  format %{ "SUB    ESP,4\n\t"
11035            "MOVSS  [ESP] $src\n\t"
11036            "FLD_S  [ESP]\n\t"
11037            "ADD    ESP,4\n\t"
11038            "FSTP   $dst\t# D-round" %}
11039  ins_encode %{
11040    __ subptr(rsp, 4);
11041    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11042    __ fld_s(Address(rsp, 0));
11043    __ addptr(rsp, 4);
11044    __ fstp_d($dst$$reg);
11045  %}
11046  ins_pipe( pipe_slow );
11047%}
11048
11049instruct convF2D_reg(regD dst, regF src) %{
11050  predicate(UseSSE>=2);
11051  match(Set dst (ConvF2D src));
11052  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11053  ins_encode %{
11054    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
11055  %}
11056  ins_pipe( pipe_slow );
11057%}
11058
11059// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11060instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
11061  predicate(UseSSE<=1);
11062  match(Set dst (ConvD2I src));
11063  effect( KILL tmp, KILL cr );
11064  format %{ "FLD    $src\t# Convert double to int \n\t"
11065            "FLDCW  trunc mode\n\t"
11066            "SUB    ESP,4\n\t"
11067            "FISTp  [ESP + #0]\n\t"
11068            "FLDCW  std/24-bit mode\n\t"
11069            "POP    EAX\n\t"
11070            "CMP    EAX,0x80000000\n\t"
11071            "JNE,s  fast\n\t"
11072            "FLD_D  $src\n\t"
11073            "CALL   d2i_wrapper\n"
11074      "fast:" %}
11075  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
11076  ins_pipe( pipe_slow );
11077%}
11078
11079// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11080instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11081  predicate(UseSSE>=2);
11082  match(Set dst (ConvD2I src));
11083  effect( KILL tmp, KILL cr );
11084  format %{ "CVTTSD2SI $dst, $src\n\t"
11085            "CMP    $dst,0x80000000\n\t"
11086            "JNE,s  fast\n\t"
11087            "SUB    ESP, 8\n\t"
11088            "MOVSD  [ESP], $src\n\t"
11089            "FLD_D  [ESP]\n\t"
11090            "ADD    ESP, 8\n\t"
11091            "CALL   d2i_wrapper\n"
11092      "fast:" %}
11093  ins_encode %{
11094    Label fast;
11095    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
11096    __ cmpl($dst$$Register, 0x80000000);
11097    __ jccb(Assembler::notEqual, fast);
11098    __ subptr(rsp, 8);
11099    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11100    __ fld_d(Address(rsp, 0));
11101    __ addptr(rsp, 8);
11102    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11103    __ bind(fast);
11104  %}
11105  ins_pipe( pipe_slow );
11106%}
11107
11108instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
11109  predicate(UseSSE<=1);
11110  match(Set dst (ConvD2L src));
11111  effect( KILL cr );
11112  format %{ "FLD    $src\t# Convert double to long\n\t"
11113            "FLDCW  trunc mode\n\t"
11114            "SUB    ESP,8\n\t"
11115            "FISTp  [ESP + #0]\n\t"
11116            "FLDCW  std/24-bit mode\n\t"
11117            "POP    EAX\n\t"
11118            "POP    EDX\n\t"
11119            "CMP    EDX,0x80000000\n\t"
11120            "JNE,s  fast\n\t"
11121            "TEST   EAX,EAX\n\t"
11122            "JNE,s  fast\n\t"
11123            "FLD    $src\n\t"
11124            "CALL   d2l_wrapper\n"
11125      "fast:" %}
11126  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
11127  ins_pipe( pipe_slow );
11128%}
11129
11130// XMM lacks a float/double->long conversion, so use the old FPU stack.
11131instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11132  predicate (UseSSE>=2);
11133  match(Set dst (ConvD2L src));
11134  effect( KILL cr );
11135  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11136            "MOVSD  [ESP],$src\n\t"
11137            "FLD_D  [ESP]\n\t"
11138            "FLDCW  trunc mode\n\t"
11139            "FISTp  [ESP + #0]\n\t"
11140            "FLDCW  std/24-bit mode\n\t"
11141            "POP    EAX\n\t"
11142            "POP    EDX\n\t"
11143            "CMP    EDX,0x80000000\n\t"
11144            "JNE,s  fast\n\t"
11145            "TEST   EAX,EAX\n\t"
11146            "JNE,s  fast\n\t"
11147            "SUB    ESP,8\n\t"
11148            "MOVSD  [ESP],$src\n\t"
11149            "FLD_D  [ESP]\n\t"
11150            "ADD    ESP,8\n\t"
11151            "CALL   d2l_wrapper\n"
11152      "fast:" %}
11153  ins_encode %{
11154    Label fast;
11155    __ subptr(rsp, 8);
11156    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11157    __ fld_d(Address(rsp, 0));
11158    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11159    __ fistp_d(Address(rsp, 0));
11160    // Restore the rounding mode, mask the exception
11161    if (Compile::current()->in_24_bit_fp_mode()) {
11162      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11163    } else {
11164      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11165    }
11166    // Load the converted long, adjust CPU stack
11167    __ pop(rax);
11168    __ pop(rdx);
11169    __ cmpl(rdx, 0x80000000);
11170    __ jccb(Assembler::notEqual, fast);
11171    __ testl(rax, rax);
11172    __ jccb(Assembler::notEqual, fast);
11173    __ subptr(rsp, 8);
11174    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
11175    __ fld_d(Address(rsp, 0));
11176    __ addptr(rsp, 8);
11177    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11178    __ bind(fast);
11179  %}
11180  ins_pipe( pipe_slow );
11181%}
11182
11183// Convert a double to an int.  Java semantics require we do complex
11184// manglations in the corner cases.  So we set the rounding mode to
11185// 'zero', store the darned double down as an int, and reset the
11186// rounding mode to 'nearest'.  The hardware stores a flag value down
11187// if we would overflow or converted a NAN; we check for this and
11188// and go the slow path if needed.
11189instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
11190  predicate(UseSSE==0);
11191  match(Set dst (ConvF2I src));
11192  effect( KILL tmp, KILL cr );
11193  format %{ "FLD    $src\t# Convert float to int \n\t"
11194            "FLDCW  trunc mode\n\t"
11195            "SUB    ESP,4\n\t"
11196            "FISTp  [ESP + #0]\n\t"
11197            "FLDCW  std/24-bit mode\n\t"
11198            "POP    EAX\n\t"
11199            "CMP    EAX,0x80000000\n\t"
11200            "JNE,s  fast\n\t"
11201            "FLD    $src\n\t"
11202            "CALL   d2i_wrapper\n"
11203      "fast:" %}
11204  // DPR2I_encoding works for FPR2I
11205  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
11206  ins_pipe( pipe_slow );
11207%}
11208
11209// Convert a float in xmm to an int reg.
11210instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11211  predicate(UseSSE>=1);
11212  match(Set dst (ConvF2I src));
11213  effect( KILL tmp, KILL cr );
11214  format %{ "CVTTSS2SI $dst, $src\n\t"
11215            "CMP    $dst,0x80000000\n\t"
11216            "JNE,s  fast\n\t"
11217            "SUB    ESP, 4\n\t"
11218            "MOVSS  [ESP], $src\n\t"
11219            "FLD    [ESP]\n\t"
11220            "ADD    ESP, 4\n\t"
11221            "CALL   d2i_wrapper\n"
11222      "fast:" %}
11223  ins_encode %{
11224    Label fast;
11225    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
11226    __ cmpl($dst$$Register, 0x80000000);
11227    __ jccb(Assembler::notEqual, fast);
11228    __ subptr(rsp, 4);
11229    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11230    __ fld_s(Address(rsp, 0));
11231    __ addptr(rsp, 4);
11232    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
11233    __ bind(fast);
11234  %}
11235  ins_pipe( pipe_slow );
11236%}
11237
11238instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
11239  predicate(UseSSE==0);
11240  match(Set dst (ConvF2L src));
11241  effect( KILL cr );
11242  format %{ "FLD    $src\t# Convert float to long\n\t"
11243            "FLDCW  trunc mode\n\t"
11244            "SUB    ESP,8\n\t"
11245            "FISTp  [ESP + #0]\n\t"
11246            "FLDCW  std/24-bit mode\n\t"
11247            "POP    EAX\n\t"
11248            "POP    EDX\n\t"
11249            "CMP    EDX,0x80000000\n\t"
11250            "JNE,s  fast\n\t"
11251            "TEST   EAX,EAX\n\t"
11252            "JNE,s  fast\n\t"
11253            "FLD    $src\n\t"
11254            "CALL   d2l_wrapper\n"
11255      "fast:" %}
11256  // DPR2L_encoding works for FPR2L
11257  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
11258  ins_pipe( pipe_slow );
11259%}
11260
11261// XMM lacks a float/double->long conversion, so use the old FPU stack.
11262instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11263  predicate (UseSSE>=1);
11264  match(Set dst (ConvF2L src));
11265  effect( KILL cr );
11266  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11267            "MOVSS  [ESP],$src\n\t"
11268            "FLD_S  [ESP]\n\t"
11269            "FLDCW  trunc mode\n\t"
11270            "FISTp  [ESP + #0]\n\t"
11271            "FLDCW  std/24-bit mode\n\t"
11272            "POP    EAX\n\t"
11273            "POP    EDX\n\t"
11274            "CMP    EDX,0x80000000\n\t"
11275            "JNE,s  fast\n\t"
11276            "TEST   EAX,EAX\n\t"
11277            "JNE,s  fast\n\t"
11278            "SUB    ESP,4\t# Convert float to long\n\t"
11279            "MOVSS  [ESP],$src\n\t"
11280            "FLD_S  [ESP]\n\t"
11281            "ADD    ESP,4\n\t"
11282            "CALL   d2l_wrapper\n"
11283      "fast:" %}
11284  ins_encode %{
11285    Label fast;
11286    __ subptr(rsp, 8);
11287    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11288    __ fld_s(Address(rsp, 0));
11289    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
11290    __ fistp_d(Address(rsp, 0));
11291    // Restore the rounding mode, mask the exception
11292    if (Compile::current()->in_24_bit_fp_mode()) {
11293      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
11294    } else {
11295      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
11296    }
11297    // Load the converted long, adjust CPU stack
11298    __ pop(rax);
11299    __ pop(rdx);
11300    __ cmpl(rdx, 0x80000000);
11301    __ jccb(Assembler::notEqual, fast);
11302    __ testl(rax, rax);
11303    __ jccb(Assembler::notEqual, fast);
11304    __ subptr(rsp, 4);
11305    __ movflt(Address(rsp, 0), $src$$XMMRegister);
11306    __ fld_s(Address(rsp, 0));
11307    __ addptr(rsp, 4);
11308    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
11309    __ bind(fast);
11310  %}
11311  ins_pipe( pipe_slow );
11312%}
11313
11314instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
11315  predicate( UseSSE<=1 );
11316  match(Set dst (ConvI2D src));
11317  format %{ "FILD   $src\n\t"
11318            "FSTP   $dst" %}
11319  opcode(0xDB, 0x0);  /* DB /0 */
11320  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
11321  ins_pipe( fpu_reg_mem );
11322%}
11323
11324instruct convI2D_reg(regD dst, eRegI src) %{
11325  predicate( UseSSE>=2 && !UseXmmI2D );
11326  match(Set dst (ConvI2D src));
11327  format %{ "CVTSI2SD $dst,$src" %}
11328  ins_encode %{
11329    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
11330  %}
11331  ins_pipe( pipe_slow );
11332%}
11333
11334instruct convI2D_mem(regD dst, memory mem) %{
11335  predicate( UseSSE>=2 );
11336  match(Set dst (ConvI2D (LoadI mem)));
11337  format %{ "CVTSI2SD $dst,$mem" %}
11338  ins_encode %{
11339    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
11340  %}
11341  ins_pipe( pipe_slow );
11342%}
11343
11344instruct convXI2D_reg(regD dst, eRegI src)
11345%{
11346  predicate( UseSSE>=2 && UseXmmI2D );
11347  match(Set dst (ConvI2D src));
11348
11349  format %{ "MOVD  $dst,$src\n\t"
11350            "CVTDQ2PD $dst,$dst\t# i2d" %}
11351  ins_encode %{
11352    __ movdl($dst$$XMMRegister, $src$$Register);
11353    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11354  %}
11355  ins_pipe(pipe_slow); // XXX
11356%}
11357
11358instruct convI2DPR_mem(regDPR dst, memory mem) %{
11359  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11360  match(Set dst (ConvI2D (LoadI mem)));
11361  format %{ "FILD   $mem\n\t"
11362            "FSTP   $dst" %}
11363  opcode(0xDB);      /* DB /0 */
11364  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11365              Pop_Reg_DPR(dst));
11366  ins_pipe( fpu_reg_mem );
11367%}
11368
11369// Convert a byte to a float; no rounding step needed.
11370instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11371  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11372  match(Set dst (ConvI2F src));
11373  format %{ "FILD   $src\n\t"
11374            "FSTP   $dst" %}
11375
11376  opcode(0xDB, 0x0);  /* DB /0 */
11377  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11378  ins_pipe( fpu_reg_mem );
11379%}
11380
11381// In 24-bit mode, force exponent rounding by storing back out
11382instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11383  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11384  match(Set dst (ConvI2F src));
11385  ins_cost(200);
11386  format %{ "FILD   $src\n\t"
11387            "FSTP_S $dst" %}
11388  opcode(0xDB, 0x0);  /* DB /0 */
11389  ins_encode( Push_Mem_I(src),
11390              Pop_Mem_FPR(dst));
11391  ins_pipe( fpu_mem_mem );
11392%}
11393
11394// In 24-bit mode, force exponent rounding by storing back out
11395instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11396  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11397  match(Set dst (ConvI2F (LoadI mem)));
11398  ins_cost(200);
11399  format %{ "FILD   $mem\n\t"
11400            "FSTP_S $dst" %}
11401  opcode(0xDB);  /* DB /0 */
11402  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11403              Pop_Mem_FPR(dst));
11404  ins_pipe( fpu_mem_mem );
11405%}
11406
11407// This instruction does not round to 24-bits
11408instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11409  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11410  match(Set dst (ConvI2F src));
11411  format %{ "FILD   $src\n\t"
11412            "FSTP   $dst" %}
11413  opcode(0xDB, 0x0);  /* DB /0 */
11414  ins_encode( Push_Mem_I(src),
11415              Pop_Reg_FPR(dst));
11416  ins_pipe( fpu_reg_mem );
11417%}
11418
11419// This instruction does not round to 24-bits
11420instruct convI2FPR_mem(regFPR dst, memory mem) %{
11421  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11422  match(Set dst (ConvI2F (LoadI mem)));
11423  format %{ "FILD   $mem\n\t"
11424            "FSTP   $dst" %}
11425  opcode(0xDB);      /* DB /0 */
11426  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11427              Pop_Reg_FPR(dst));
11428  ins_pipe( fpu_reg_mem );
11429%}
11430
11431// Convert an int to a float in xmm; no rounding step needed.
11432instruct convI2F_reg(regF dst, eRegI src) %{
11433  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11434  match(Set dst (ConvI2F src));
11435  format %{ "CVTSI2SS $dst, $src" %}
11436  ins_encode %{
11437    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11438  %}
11439  ins_pipe( pipe_slow );
11440%}
11441
11442 instruct convXI2F_reg(regF dst, eRegI src)
11443%{
11444  predicate( UseSSE>=2 && UseXmmI2F );
11445  match(Set dst (ConvI2F src));
11446
11447  format %{ "MOVD  $dst,$src\n\t"
11448            "CVTDQ2PS $dst,$dst\t# i2f" %}
11449  ins_encode %{
11450    __ movdl($dst$$XMMRegister, $src$$Register);
11451    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11452  %}
11453  ins_pipe(pipe_slow); // XXX
11454%}
11455
11456instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11457  match(Set dst (ConvI2L src));
11458  effect(KILL cr);
11459  ins_cost(375);
11460  format %{ "MOV    $dst.lo,$src\n\t"
11461            "MOV    $dst.hi,$src\n\t"
11462            "SAR    $dst.hi,31" %}
11463  ins_encode(convert_int_long(dst,src));
11464  ins_pipe( ialu_reg_reg_long );
11465%}
11466
11467// Zero-extend convert int to long
11468instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
11469  match(Set dst (AndL (ConvI2L src) mask) );
11470  effect( KILL flags );
11471  ins_cost(250);
11472  format %{ "MOV    $dst.lo,$src\n\t"
11473            "XOR    $dst.hi,$dst.hi" %}
11474  opcode(0x33); // XOR
11475  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11476  ins_pipe( ialu_reg_reg_long );
11477%}
11478
11479// Zero-extend long
11480instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11481  match(Set dst (AndL src mask) );
11482  effect( KILL flags );
11483  ins_cost(250);
11484  format %{ "MOV    $dst.lo,$src.lo\n\t"
11485            "XOR    $dst.hi,$dst.hi\n\t" %}
11486  opcode(0x33); // XOR
11487  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11488  ins_pipe( ialu_reg_reg_long );
11489%}
11490
11491instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11492  predicate (UseSSE<=1);
11493  match(Set dst (ConvL2D src));
11494  effect( KILL cr );
11495  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11496            "PUSH   $src.lo\n\t"
11497            "FILD   ST,[ESP + #0]\n\t"
11498            "ADD    ESP,8\n\t"
11499            "FSTP_D $dst\t# D-round" %}
11500  opcode(0xDF, 0x5);  /* DF /5 */
11501  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11502  ins_pipe( pipe_slow );
11503%}
11504
11505instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11506  predicate (UseSSE>=2);
11507  match(Set dst (ConvL2D src));
11508  effect( KILL cr );
11509  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11510            "PUSH   $src.lo\n\t"
11511            "FILD_D [ESP]\n\t"
11512            "FSTP_D [ESP]\n\t"
11513            "MOVSD  $dst,[ESP]\n\t"
11514            "ADD    ESP,8" %}
11515  opcode(0xDF, 0x5);  /* DF /5 */
11516  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11517  ins_pipe( pipe_slow );
11518%}
11519
11520instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11521  predicate (UseSSE>=1);
11522  match(Set dst (ConvL2F src));
11523  effect( KILL cr );
11524  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11525            "PUSH   $src.lo\n\t"
11526            "FILD_D [ESP]\n\t"
11527            "FSTP_S [ESP]\n\t"
11528            "MOVSS  $dst,[ESP]\n\t"
11529            "ADD    ESP,8" %}
11530  opcode(0xDF, 0x5);  /* DF /5 */
11531  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11532  ins_pipe( pipe_slow );
11533%}
11534
11535instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11536  match(Set dst (ConvL2F src));
11537  effect( KILL cr );
11538  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11539            "PUSH   $src.lo\n\t"
11540            "FILD   ST,[ESP + #0]\n\t"
11541            "ADD    ESP,8\n\t"
11542            "FSTP_S $dst\t# F-round" %}
11543  opcode(0xDF, 0x5);  /* DF /5 */
11544  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11545  ins_pipe( pipe_slow );
11546%}
11547
11548instruct convL2I_reg( eRegI dst, eRegL src ) %{
11549  match(Set dst (ConvL2I src));
11550  effect( DEF dst, USE src );
11551  format %{ "MOV    $dst,$src.lo" %}
11552  ins_encode(enc_CopyL_Lo(dst,src));
11553  ins_pipe( ialu_reg_reg );
11554%}
11555
11556
11557instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
11558  match(Set dst (MoveF2I src));
11559  effect( DEF dst, USE src );
11560  ins_cost(100);
11561  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11562  ins_encode %{
11563    __ movl($dst$$Register, Address(rsp, $src$$disp));
11564  %}
11565  ins_pipe( ialu_reg_mem );
11566%}
11567
11568instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11569  predicate(UseSSE==0);
11570  match(Set dst (MoveF2I src));
11571  effect( DEF dst, USE src );
11572
11573  ins_cost(125);
11574  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11575  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11576  ins_pipe( fpu_mem_reg );
11577%}
11578
11579instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11580  predicate(UseSSE>=1);
11581  match(Set dst (MoveF2I src));
11582  effect( DEF dst, USE src );
11583
11584  ins_cost(95);
11585  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11586  ins_encode %{
11587    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11588  %}
11589  ins_pipe( pipe_slow );
11590%}
11591
11592instruct MoveF2I_reg_reg_sse(eRegI dst, regF src) %{
11593  predicate(UseSSE>=2);
11594  match(Set dst (MoveF2I src));
11595  effect( DEF dst, USE src );
11596  ins_cost(85);
11597  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11598  ins_encode %{
11599    __ movdl($dst$$Register, $src$$XMMRegister);
11600  %}
11601  ins_pipe( pipe_slow );
11602%}
11603
11604instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
11605  match(Set dst (MoveI2F src));
11606  effect( DEF dst, USE src );
11607
11608  ins_cost(100);
11609  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11610  ins_encode %{
11611    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11612  %}
11613  ins_pipe( ialu_mem_reg );
11614%}
11615
11616
11617instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11618  predicate(UseSSE==0);
11619  match(Set dst (MoveI2F src));
11620  effect(DEF dst, USE src);
11621
11622  ins_cost(125);
11623  format %{ "FLD_S  $src\n\t"
11624            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11625  opcode(0xD9);               /* D9 /0, FLD m32real */
11626  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11627              Pop_Reg_FPR(dst) );
11628  ins_pipe( fpu_reg_mem );
11629%}
11630
11631instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11632  predicate(UseSSE>=1);
11633  match(Set dst (MoveI2F src));
11634  effect( DEF dst, USE src );
11635
11636  ins_cost(95);
11637  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11638  ins_encode %{
11639    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11640  %}
11641  ins_pipe( pipe_slow );
11642%}
11643
11644instruct MoveI2F_reg_reg_sse(regF dst, eRegI src) %{
11645  predicate(UseSSE>=2);
11646  match(Set dst (MoveI2F src));
11647  effect( DEF dst, USE src );
11648
11649  ins_cost(85);
11650  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11651  ins_encode %{
11652    __ movdl($dst$$XMMRegister, $src$$Register);
11653  %}
11654  ins_pipe( pipe_slow );
11655%}
11656
11657instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11658  match(Set dst (MoveD2L src));
11659  effect(DEF dst, USE src);
11660
11661  ins_cost(250);
11662  format %{ "MOV    $dst.lo,$src\n\t"
11663            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11664  opcode(0x8B, 0x8B);
11665  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11666  ins_pipe( ialu_mem_long_reg );
11667%}
11668
11669instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11670  predicate(UseSSE<=1);
11671  match(Set dst (MoveD2L src));
11672  effect(DEF dst, USE src);
11673
11674  ins_cost(125);
11675  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11676  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11677  ins_pipe( fpu_mem_reg );
11678%}
11679
11680instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11681  predicate(UseSSE>=2);
11682  match(Set dst (MoveD2L src));
11683  effect(DEF dst, USE src);
11684  ins_cost(95);
11685  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11686  ins_encode %{
11687    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11688  %}
11689  ins_pipe( pipe_slow );
11690%}
11691
11692instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11693  predicate(UseSSE>=2);
11694  match(Set dst (MoveD2L src));
11695  effect(DEF dst, USE src, TEMP tmp);
11696  ins_cost(85);
11697  format %{ "MOVD   $dst.lo,$src\n\t"
11698            "PSHUFLW $tmp,$src,0x4E\n\t"
11699            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11700  ins_encode %{
11701    __ movdl($dst$$Register, $src$$XMMRegister);
11702    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11703    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11704  %}
11705  ins_pipe( pipe_slow );
11706%}
11707
11708instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11709  match(Set dst (MoveL2D src));
11710  effect(DEF dst, USE src);
11711
11712  ins_cost(200);
11713  format %{ "MOV    $dst,$src.lo\n\t"
11714            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11715  opcode(0x89, 0x89);
11716  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11717  ins_pipe( ialu_mem_long_reg );
11718%}
11719
11720
11721instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11722  predicate(UseSSE<=1);
11723  match(Set dst (MoveL2D src));
11724  effect(DEF dst, USE src);
11725  ins_cost(125);
11726
11727  format %{ "FLD_D  $src\n\t"
11728            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11729  opcode(0xDD);               /* DD /0, FLD m64real */
11730  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11731              Pop_Reg_DPR(dst) );
11732  ins_pipe( fpu_reg_mem );
11733%}
11734
11735
11736instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11737  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11738  match(Set dst (MoveL2D src));
11739  effect(DEF dst, USE src);
11740
11741  ins_cost(95);
11742  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11743  ins_encode %{
11744    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11745  %}
11746  ins_pipe( pipe_slow );
11747%}
11748
11749instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11750  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11751  match(Set dst (MoveL2D src));
11752  effect(DEF dst, USE src);
11753
11754  ins_cost(95);
11755  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11756  ins_encode %{
11757    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11758  %}
11759  ins_pipe( pipe_slow );
11760%}
11761
11762instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11763  predicate(UseSSE>=2);
11764  match(Set dst (MoveL2D src));
11765  effect(TEMP dst, USE src, TEMP tmp);
11766  ins_cost(85);
11767  format %{ "MOVD   $dst,$src.lo\n\t"
11768            "MOVD   $tmp,$src.hi\n\t"
11769            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11770  ins_encode %{
11771    __ movdl($dst$$XMMRegister, $src$$Register);
11772    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11773    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11774  %}
11775  ins_pipe( pipe_slow );
11776%}
11777
11778// Replicate scalar to packed byte (1 byte) values in xmm
11779instruct Repl8B_reg(regD dst, regD src) %{
11780  predicate(UseSSE>=2);
11781  match(Set dst (Replicate8B src));
11782  format %{ "MOVDQA  $dst,$src\n\t"
11783            "PUNPCKLBW $dst,$dst\n\t"
11784            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11785  ins_encode %{
11786    if ($dst$$reg != $src$$reg) {
11787      __ movdqa($dst$$XMMRegister, $src$$XMMRegister);
11788    }
11789    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11790    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11791  %}
11792  ins_pipe( pipe_slow );
11793%}
11794
11795// Replicate scalar to packed byte (1 byte) values in xmm
11796instruct Repl8B_eRegI(regD dst, eRegI src) %{
11797  predicate(UseSSE>=2);
11798  match(Set dst (Replicate8B src));
11799  format %{ "MOVD    $dst,$src\n\t"
11800            "PUNPCKLBW $dst,$dst\n\t"
11801            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11802  ins_encode %{
11803    __ movdl($dst$$XMMRegister, $src$$Register);
11804    __ punpcklbw($dst$$XMMRegister, $dst$$XMMRegister);
11805    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11806  %}
11807  ins_pipe( pipe_slow );
11808%}
11809
11810// Replicate scalar zero to packed byte (1 byte) values in xmm
11811instruct Repl8B_immI0(regD dst, immI0 zero) %{
11812  predicate(UseSSE>=2);
11813  match(Set dst (Replicate8B zero));
11814  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11815  ins_encode %{
11816    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11817  %}
11818  ins_pipe( fpu_reg_reg );
11819%}
11820
11821// Replicate scalar to packed shore (2 byte) values in xmm
11822instruct Repl4S_reg(regD dst, regD src) %{
11823  predicate(UseSSE>=2);
11824  match(Set dst (Replicate4S src));
11825  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11826  ins_encode %{
11827    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11828  %}
11829  ins_pipe( fpu_reg_reg );
11830%}
11831
11832// Replicate scalar to packed shore (2 byte) values in xmm
11833instruct Repl4S_eRegI(regD dst, eRegI src) %{
11834  predicate(UseSSE>=2);
11835  match(Set dst (Replicate4S src));
11836  format %{ "MOVD    $dst,$src\n\t"
11837            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11838  ins_encode %{
11839    __ movdl($dst$$XMMRegister, $src$$Register);
11840    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11841  %}
11842  ins_pipe( fpu_reg_reg );
11843%}
11844
11845// Replicate scalar zero to packed short (2 byte) values in xmm
11846instruct Repl4S_immI0(regD dst, immI0 zero) %{
11847  predicate(UseSSE>=2);
11848  match(Set dst (Replicate4S zero));
11849  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11850  ins_encode %{
11851    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11852  %}
11853  ins_pipe( fpu_reg_reg );
11854%}
11855
11856// Replicate scalar to packed char (2 byte) values in xmm
11857instruct Repl4C_reg(regD dst, regD src) %{
11858  predicate(UseSSE>=2);
11859  match(Set dst (Replicate4C src));
11860  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11861  ins_encode %{
11862    __ pshuflw($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11863  %}
11864  ins_pipe( fpu_reg_reg );
11865%}
11866
11867// Replicate scalar to packed char (2 byte) values in xmm
11868instruct Repl4C_eRegI(regD dst, eRegI src) %{
11869  predicate(UseSSE>=2);
11870  match(Set dst (Replicate4C src));
11871  format %{ "MOVD    $dst,$src\n\t"
11872            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11873  ins_encode %{
11874    __ movdl($dst$$XMMRegister, $src$$Register);
11875    __ pshuflw($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11876  %}
11877  ins_pipe( fpu_reg_reg );
11878%}
11879
11880// Replicate scalar zero to packed char (2 byte) values in xmm
11881instruct Repl4C_immI0(regD dst, immI0 zero) %{
11882  predicate(UseSSE>=2);
11883  match(Set dst (Replicate4C zero));
11884  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11885  ins_encode %{
11886    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11887  %}
11888  ins_pipe( fpu_reg_reg );
11889%}
11890
11891// Replicate scalar to packed integer (4 byte) values in xmm
11892instruct Repl2I_reg(regD dst, regD src) %{
11893  predicate(UseSSE>=2);
11894  match(Set dst (Replicate2I src));
11895  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11896  ins_encode %{
11897    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0x00);
11898  %}
11899  ins_pipe( fpu_reg_reg );
11900%}
11901
11902// Replicate scalar to packed integer (4 byte) values in xmm
11903instruct Repl2I_eRegI(regD dst, eRegI src) %{
11904  predicate(UseSSE>=2);
11905  match(Set dst (Replicate2I src));
11906  format %{ "MOVD   $dst,$src\n\t"
11907            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11908  ins_encode %{
11909    __ movdl($dst$$XMMRegister, $src$$Register);
11910    __ pshufd($dst$$XMMRegister, $dst$$XMMRegister, 0x00);
11911  %}
11912  ins_pipe( fpu_reg_reg );
11913%}
11914
11915// Replicate scalar zero to packed integer (2 byte) values in xmm
11916instruct Repl2I_immI0(regD dst, immI0 zero) %{
11917  predicate(UseSSE>=2);
11918  match(Set dst (Replicate2I zero));
11919  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11920  ins_encode %{
11921    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11922  %}
11923  ins_pipe( fpu_reg_reg );
11924%}
11925
11926// Replicate scalar to packed single precision floating point values in xmm
11927instruct Repl2F_reg(regD dst, regD src) %{
11928  predicate(UseSSE>=2);
11929  match(Set dst (Replicate2F src));
11930  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11931  ins_encode %{
11932    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
11933  %}
11934  ins_pipe( fpu_reg_reg );
11935%}
11936
11937// Replicate scalar to packed single precision floating point values in xmm
11938instruct Repl2F_regF(regD dst, regF src) %{
11939  predicate(UseSSE>=2);
11940  match(Set dst (Replicate2F src));
11941  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11942  ins_encode %{
11943    __ pshufd($dst$$XMMRegister, $src$$XMMRegister, 0xe0);
11944  %}
11945  ins_pipe( fpu_reg_reg );
11946%}
11947
11948// Replicate scalar to packed single precision floating point values in xmm
11949instruct Repl2F_immF0(regD dst, immF0 zero) %{
11950  predicate(UseSSE>=2);
11951  match(Set dst (Replicate2F zero));
11952  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11953  ins_encode %{
11954    __ pxor($dst$$XMMRegister, $dst$$XMMRegister);
11955  %}
11956  ins_pipe( fpu_reg_reg );
11957%}
11958
11959// =======================================================================
11960// fast clearing of an array
11961instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11962  match(Set dummy (ClearArray cnt base));
11963  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11964  format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
11965            "XOR    EAX,EAX\n\t"
11966            "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11967  opcode(0,0x4);
11968  ins_encode( Opcode(0xD1), RegOpc(ECX),
11969              OpcRegReg(0x33,EAX,EAX),
11970              Opcode(0xF3), Opcode(0xAB) );
11971  ins_pipe( pipe_slow );
11972%}
11973
11974instruct string_compare(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11975                        eAXRegI result, regD tmp1, eFlagsReg cr) %{
11976  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11977  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11978
11979  format %{ "String Compare $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11980  ins_encode %{
11981    __ string_compare($str1$$Register, $str2$$Register,
11982                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11983                      $tmp1$$XMMRegister);
11984  %}
11985  ins_pipe( pipe_slow );
11986%}
11987
11988// fast string equals
11989instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11990                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11991  match(Set result (StrEquals (Binary str1 str2) cnt));
11992  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11993
11994  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11995  ins_encode %{
11996    __ char_arrays_equals(false, $str1$$Register, $str2$$Register,
11997                          $cnt$$Register, $result$$Register, $tmp3$$Register,
11998                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11999  %}
12000  ins_pipe( pipe_slow );
12001%}
12002
12003// fast search of substring with known size.
12004instruct string_indexof_con(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
12005                            eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
12006  predicate(UseSSE42Intrinsics);
12007  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
12008  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
12009
12010  format %{ "String IndexOf $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
12011  ins_encode %{
12012    int icnt2 = (int)$int_cnt2$$constant;
12013    if (icnt2 >= 8) {
12014      // IndexOf for constant substrings with size >= 8 elements
12015      // which don't need to be loaded through stack.
12016      __ string_indexofC8($str1$$Register, $str2$$Register,
12017                          $cnt1$$Register, $cnt2$$Register,
12018                          icnt2, $result$$Register,
12019                          $vec$$XMMRegister, $tmp$$Register);
12020    } else {
12021      // Small strings are loaded through stack if they cross page boundary.
12022      __ string_indexof($str1$$Register, $str2$$Register,
12023                        $cnt1$$Register, $cnt2$$Register,
12024                        icnt2, $result$$Register,
12025                        $vec$$XMMRegister, $tmp$$Register);
12026    }
12027  %}
12028  ins_pipe( pipe_slow );
12029%}
12030
12031instruct string_indexof(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
12032                        eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
12033  predicate(UseSSE42Intrinsics);
12034  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
12035  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
12036
12037  format %{ "String IndexOf $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
12038  ins_encode %{
12039    __ string_indexof($str1$$Register, $str2$$Register,
12040                      $cnt1$$Register, $cnt2$$Register,
12041                      (-1), $result$$Register,
12042                      $vec$$XMMRegister, $tmp$$Register);
12043  %}
12044  ins_pipe( pipe_slow );
12045%}
12046
12047// fast array equals
12048instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
12049                      regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
12050%{
12051  match(Set result (AryEq ary1 ary2));
12052  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
12053  //ins_cost(300);
12054
12055  format %{ "Array Equals $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
12056  ins_encode %{
12057    __ char_arrays_equals(true, $ary1$$Register, $ary2$$Register,
12058                          $tmp3$$Register, $result$$Register, $tmp4$$Register,
12059                          $tmp1$$XMMRegister, $tmp2$$XMMRegister);
12060  %}
12061  ins_pipe( pipe_slow );
12062%}
12063
12064//----------Control Flow Instructions------------------------------------------
12065// Signed compare Instructions
12066instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
12067  match(Set cr (CmpI op1 op2));
12068  effect( DEF cr, USE op1, USE op2 );
12069  format %{ "CMP    $op1,$op2" %}
12070  opcode(0x3B);  /* Opcode 3B /r */
12071  ins_encode( OpcP, RegReg( op1, op2) );
12072  ins_pipe( ialu_cr_reg_reg );
12073%}
12074
12075instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
12076  match(Set cr (CmpI op1 op2));
12077  effect( DEF cr, USE op1 );
12078  format %{ "CMP    $op1,$op2" %}
12079  opcode(0x81,0x07);  /* Opcode 81 /7 */
12080  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
12081  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12082  ins_pipe( ialu_cr_reg_imm );
12083%}
12084
12085// Cisc-spilled version of cmpI_eReg
12086instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
12087  match(Set cr (CmpI op1 (LoadI op2)));
12088
12089  format %{ "CMP    $op1,$op2" %}
12090  ins_cost(500);
12091  opcode(0x3B);  /* Opcode 3B /r */
12092  ins_encode( OpcP, RegMem( op1, op2) );
12093  ins_pipe( ialu_cr_reg_mem );
12094%}
12095
12096instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
12097  match(Set cr (CmpI src zero));
12098  effect( DEF cr, USE src );
12099
12100  format %{ "TEST   $src,$src" %}
12101  opcode(0x85);
12102  ins_encode( OpcP, RegReg( src, src ) );
12103  ins_pipe( ialu_cr_reg_imm );
12104%}
12105
12106instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
12107  match(Set cr (CmpI (AndI src con) zero));
12108
12109  format %{ "TEST   $src,$con" %}
12110  opcode(0xF7,0x00);
12111  ins_encode( OpcP, RegOpc(src), Con32(con) );
12112  ins_pipe( ialu_cr_reg_imm );
12113%}
12114
12115instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
12116  match(Set cr (CmpI (AndI src mem) zero));
12117
12118  format %{ "TEST   $src,$mem" %}
12119  opcode(0x85);
12120  ins_encode( OpcP, RegMem( src, mem ) );
12121  ins_pipe( ialu_cr_reg_mem );
12122%}
12123
12124// Unsigned compare Instructions; really, same as signed except they
12125// produce an eFlagsRegU instead of eFlagsReg.
12126instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12127  match(Set cr (CmpU op1 op2));
12128
12129  format %{ "CMPu   $op1,$op2" %}
12130  opcode(0x3B);  /* Opcode 3B /r */
12131  ins_encode( OpcP, RegReg( op1, op2) );
12132  ins_pipe( ialu_cr_reg_reg );
12133%}
12134
12135instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12136  match(Set cr (CmpU op1 op2));
12137
12138  format %{ "CMPu   $op1,$op2" %}
12139  opcode(0x81,0x07);  /* Opcode 81 /7 */
12140  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12141  ins_pipe( ialu_cr_reg_imm );
12142%}
12143
12144// // Cisc-spilled version of cmpU_eReg
12145instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12146  match(Set cr (CmpU op1 (LoadI op2)));
12147
12148  format %{ "CMPu   $op1,$op2" %}
12149  ins_cost(500);
12150  opcode(0x3B);  /* Opcode 3B /r */
12151  ins_encode( OpcP, RegMem( op1, op2) );
12152  ins_pipe( ialu_cr_reg_mem );
12153%}
12154
12155// // Cisc-spilled version of cmpU_eReg
12156//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12157//  match(Set cr (CmpU (LoadI op1) op2));
12158//
12159//  format %{ "CMPu   $op1,$op2" %}
12160//  ins_cost(500);
12161//  opcode(0x39);  /* Opcode 39 /r */
12162//  ins_encode( OpcP, RegMem( op1, op2) );
12163//%}
12164
12165instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12166  match(Set cr (CmpU src zero));
12167
12168  format %{ "TESTu  $src,$src" %}
12169  opcode(0x85);
12170  ins_encode( OpcP, RegReg( src, src ) );
12171  ins_pipe( ialu_cr_reg_imm );
12172%}
12173
12174// Unsigned pointer compare Instructions
12175instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12176  match(Set cr (CmpP op1 op2));
12177
12178  format %{ "CMPu   $op1,$op2" %}
12179  opcode(0x3B);  /* Opcode 3B /r */
12180  ins_encode( OpcP, RegReg( op1, op2) );
12181  ins_pipe( ialu_cr_reg_reg );
12182%}
12183
12184instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12185  match(Set cr (CmpP op1 op2));
12186
12187  format %{ "CMPu   $op1,$op2" %}
12188  opcode(0x81,0x07);  /* Opcode 81 /7 */
12189  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12190  ins_pipe( ialu_cr_reg_imm );
12191%}
12192
12193// // Cisc-spilled version of cmpP_eReg
12194instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12195  match(Set cr (CmpP op1 (LoadP op2)));
12196
12197  format %{ "CMPu   $op1,$op2" %}
12198  ins_cost(500);
12199  opcode(0x3B);  /* Opcode 3B /r */
12200  ins_encode( OpcP, RegMem( op1, op2) );
12201  ins_pipe( ialu_cr_reg_mem );
12202%}
12203
12204// // Cisc-spilled version of cmpP_eReg
12205//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12206//  match(Set cr (CmpP (LoadP op1) op2));
12207//
12208//  format %{ "CMPu   $op1,$op2" %}
12209//  ins_cost(500);
12210//  opcode(0x39);  /* Opcode 39 /r */
12211//  ins_encode( OpcP, RegMem( op1, op2) );
12212//%}
12213
12214// Compare raw pointer (used in out-of-heap check).
12215// Only works because non-oop pointers must be raw pointers
12216// and raw pointers have no anti-dependencies.
12217instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12218  predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12219  match(Set cr (CmpP op1 (LoadP op2)));
12220
12221  format %{ "CMPu   $op1,$op2" %}
12222  opcode(0x3B);  /* Opcode 3B /r */
12223  ins_encode( OpcP, RegMem( op1, op2) );
12224  ins_pipe( ialu_cr_reg_mem );
12225%}
12226
12227//
12228// This will generate a signed flags result. This should be ok
12229// since any compare to a zero should be eq/neq.
12230instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12231  match(Set cr (CmpP src zero));
12232
12233  format %{ "TEST   $src,$src" %}
12234  opcode(0x85);
12235  ins_encode( OpcP, RegReg( src, src ) );
12236  ins_pipe( ialu_cr_reg_imm );
12237%}
12238
12239// Cisc-spilled version of testP_reg
12240// This will generate a signed flags result. This should be ok
12241// since any compare to a zero should be eq/neq.
12242instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12243  match(Set cr (CmpP (LoadP op) zero));
12244
12245  format %{ "TEST   $op,0xFFFFFFFF" %}
12246  ins_cost(500);
12247  opcode(0xF7);               /* Opcode F7 /0 */
12248  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12249  ins_pipe( ialu_cr_reg_imm );
12250%}
12251
12252// Yanked all unsigned pointer compare operations.
12253// Pointer compares are done with CmpP which is already unsigned.
12254
12255//----------Max and Min--------------------------------------------------------
12256// Min Instructions
12257////
12258//   *** Min and Max using the conditional move are slower than the
12259//   *** branch version on a Pentium III.
12260// // Conditional move for min
12261//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12262//  effect( USE_DEF op2, USE op1, USE cr );
12263//  format %{ "CMOVlt $op2,$op1\t! min" %}
12264//  opcode(0x4C,0x0F);
12265//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12266//  ins_pipe( pipe_cmov_reg );
12267//%}
12268//
12269//// Min Register with Register (P6 version)
12270//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12271//  predicate(VM_Version::supports_cmov() );
12272//  match(Set op2 (MinI op1 op2));
12273//  ins_cost(200);
12274//  expand %{
12275//    eFlagsReg cr;
12276//    compI_eReg(cr,op1,op2);
12277//    cmovI_reg_lt(op2,op1,cr);
12278//  %}
12279//%}
12280
12281// Min Register with Register (generic version)
12282instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12283  match(Set dst (MinI dst src));
12284  effect(KILL flags);
12285  ins_cost(300);
12286
12287  format %{ "MIN    $dst,$src" %}
12288  opcode(0xCC);
12289  ins_encode( min_enc(dst,src) );
12290  ins_pipe( pipe_slow );
12291%}
12292
12293// Max Register with Register
12294//   *** Min and Max using the conditional move are slower than the
12295//   *** branch version on a Pentium III.
12296// // Conditional move for max
12297//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12298//  effect( USE_DEF op2, USE op1, USE cr );
12299//  format %{ "CMOVgt $op2,$op1\t! max" %}
12300//  opcode(0x4F,0x0F);
12301//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12302//  ins_pipe( pipe_cmov_reg );
12303//%}
12304//
12305// // Max Register with Register (P6 version)
12306//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12307//  predicate(VM_Version::supports_cmov() );
12308//  match(Set op2 (MaxI op1 op2));
12309//  ins_cost(200);
12310//  expand %{
12311//    eFlagsReg cr;
12312//    compI_eReg(cr,op1,op2);
12313//    cmovI_reg_gt(op2,op1,cr);
12314//  %}
12315//%}
12316
12317// Max Register with Register (generic version)
12318instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12319  match(Set dst (MaxI dst src));
12320  effect(KILL flags);
12321  ins_cost(300);
12322
12323  format %{ "MAX    $dst,$src" %}
12324  opcode(0xCC);
12325  ins_encode( max_enc(dst,src) );
12326  ins_pipe( pipe_slow );
12327%}
12328
12329// ============================================================================
12330// Counted Loop limit node which represents exact final iterator value.
12331// Note: the resulting value should fit into integer range since
12332// counted loops have limit check on overflow.
12333instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12334  match(Set limit (LoopLimit (Binary init limit) stride));
12335  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12336  ins_cost(300);
12337
12338  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12339  ins_encode %{
12340    int strd = (int)$stride$$constant;
12341    assert(strd != 1 && strd != -1, "sanity");
12342    int m1 = (strd > 0) ? 1 : -1;
12343    // Convert limit to long (EAX:EDX)
12344    __ cdql();
12345    // Convert init to long (init:tmp)
12346    __ movl($tmp$$Register, $init$$Register);
12347    __ sarl($tmp$$Register, 31);
12348    // $limit - $init
12349    __ subl($limit$$Register, $init$$Register);
12350    __ sbbl($limit_hi$$Register, $tmp$$Register);
12351    // + ($stride - 1)
12352    if (strd > 0) {
12353      __ addl($limit$$Register, (strd - 1));
12354      __ adcl($limit_hi$$Register, 0);
12355      __ movl($tmp$$Register, strd);
12356    } else {
12357      __ addl($limit$$Register, (strd + 1));
12358      __ adcl($limit_hi$$Register, -1);
12359      __ lneg($limit_hi$$Register, $limit$$Register);
12360      __ movl($tmp$$Register, -strd);
12361    }
12362    // signed devision: (EAX:EDX) / pos_stride
12363    __ idivl($tmp$$Register);
12364    if (strd < 0) {
12365      // restore sign
12366      __ negl($tmp$$Register);
12367    }
12368    // (EAX) * stride
12369    __ mull($tmp$$Register);
12370    // + init (ignore upper bits)
12371    __ addl($limit$$Register, $init$$Register);
12372  %}
12373  ins_pipe( pipe_slow );
12374%}
12375
12376// ============================================================================
12377// Branch Instructions
12378// Jump Table
12379instruct jumpXtnd(eRegI switch_val) %{
12380  match(Jump switch_val);
12381  ins_cost(350);
12382  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12383  ins_encode %{
12384    // Jump to Address(table_base + switch_reg)
12385    Address index(noreg, $switch_val$$Register, Address::times_1);
12386    __ jump(ArrayAddress($constantaddress, index));
12387  %}
12388  ins_pipe(pipe_jmp);
12389%}
12390
12391// Jump Direct - Label defines a relative address from JMP+1
12392instruct jmpDir(label labl) %{
12393  match(Goto);
12394  effect(USE labl);
12395
12396  ins_cost(300);
12397  format %{ "JMP    $labl" %}
12398  size(5);
12399  ins_encode %{
12400    Label* L = $labl$$label;
12401    __ jmp(*L, false); // Always long jump
12402  %}
12403  ins_pipe( pipe_jmp );
12404%}
12405
12406// Jump Direct Conditional - Label defines a relative address from Jcc+1
12407instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12408  match(If cop cr);
12409  effect(USE labl);
12410
12411  ins_cost(300);
12412  format %{ "J$cop    $labl" %}
12413  size(6);
12414  ins_encode %{
12415    Label* L = $labl$$label;
12416    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12417  %}
12418  ins_pipe( pipe_jcc );
12419%}
12420
12421// Jump Direct Conditional - Label defines a relative address from Jcc+1
12422instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12423  match(CountedLoopEnd cop cr);
12424  effect(USE labl);
12425
12426  ins_cost(300);
12427  format %{ "J$cop    $labl\t# Loop end" %}
12428  size(6);
12429  ins_encode %{
12430    Label* L = $labl$$label;
12431    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12432  %}
12433  ins_pipe( pipe_jcc );
12434%}
12435
12436// Jump Direct Conditional - Label defines a relative address from Jcc+1
12437instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12438  match(CountedLoopEnd cop cmp);
12439  effect(USE labl);
12440
12441  ins_cost(300);
12442  format %{ "J$cop,u  $labl\t# Loop end" %}
12443  size(6);
12444  ins_encode %{
12445    Label* L = $labl$$label;
12446    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12447  %}
12448  ins_pipe( pipe_jcc );
12449%}
12450
12451instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12452  match(CountedLoopEnd cop cmp);
12453  effect(USE labl);
12454
12455  ins_cost(200);
12456  format %{ "J$cop,u  $labl\t# Loop end" %}
12457  size(6);
12458  ins_encode %{
12459    Label* L = $labl$$label;
12460    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12461  %}
12462  ins_pipe( pipe_jcc );
12463%}
12464
12465// Jump Direct Conditional - using unsigned comparison
12466instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12467  match(If cop cmp);
12468  effect(USE labl);
12469
12470  ins_cost(300);
12471  format %{ "J$cop,u  $labl" %}
12472  size(6);
12473  ins_encode %{
12474    Label* L = $labl$$label;
12475    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12476  %}
12477  ins_pipe(pipe_jcc);
12478%}
12479
12480instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12481  match(If cop cmp);
12482  effect(USE labl);
12483
12484  ins_cost(200);
12485  format %{ "J$cop,u  $labl" %}
12486  size(6);
12487  ins_encode %{
12488    Label* L = $labl$$label;
12489    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12490  %}
12491  ins_pipe(pipe_jcc);
12492%}
12493
12494instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12495  match(If cop cmp);
12496  effect(USE labl);
12497
12498  ins_cost(200);
12499  format %{ $$template
12500    if ($cop$$cmpcode == Assembler::notEqual) {
12501      $$emit$$"JP,u   $labl\n\t"
12502      $$emit$$"J$cop,u   $labl"
12503    } else {
12504      $$emit$$"JP,u   done\n\t"
12505      $$emit$$"J$cop,u   $labl\n\t"
12506      $$emit$$"done:"
12507    }
12508  %}
12509  ins_encode %{
12510    Label* l = $labl$$label;
12511    if ($cop$$cmpcode == Assembler::notEqual) {
12512      __ jcc(Assembler::parity, *l, false);
12513      __ jcc(Assembler::notEqual, *l, false);
12514    } else if ($cop$$cmpcode == Assembler::equal) {
12515      Label done;
12516      __ jccb(Assembler::parity, done);
12517      __ jcc(Assembler::equal, *l, false);
12518      __ bind(done);
12519    } else {
12520       ShouldNotReachHere();
12521    }
12522  %}
12523  ins_pipe(pipe_jcc);
12524%}
12525
12526// ============================================================================
12527// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12528// array for an instance of the superklass.  Set a hidden internal cache on a
12529// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12530// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12531instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12532  match(Set result (PartialSubtypeCheck sub super));
12533  effect( KILL rcx, KILL cr );
12534
12535  ins_cost(1100);  // slightly larger than the next version
12536  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12537            "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12538            "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12539            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12540            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12541            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12542            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12543     "miss:\t" %}
12544
12545  opcode(0x1); // Force a XOR of EDI
12546  ins_encode( enc_PartialSubtypeCheck() );
12547  ins_pipe( pipe_slow );
12548%}
12549
12550instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12551  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12552  effect( KILL rcx, KILL result );
12553
12554  ins_cost(1000);
12555  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12556            "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12557            "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12558            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12559            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12560            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12561     "miss:\t" %}
12562
12563  opcode(0x0);  // No need to XOR EDI
12564  ins_encode( enc_PartialSubtypeCheck() );
12565  ins_pipe( pipe_slow );
12566%}
12567
12568// ============================================================================
12569// Branch Instructions -- short offset versions
12570//
12571// These instructions are used to replace jumps of a long offset (the default
12572// match) with jumps of a shorter offset.  These instructions are all tagged
12573// with the ins_short_branch attribute, which causes the ADLC to suppress the
12574// match rules in general matching.  Instead, the ADLC generates a conversion
12575// method in the MachNode which can be used to do in-place replacement of the
12576// long variant with the shorter variant.  The compiler will determine if a
12577// branch can be taken by the is_short_branch_offset() predicate in the machine
12578// specific code section of the file.
12579
12580// Jump Direct - Label defines a relative address from JMP+1
12581instruct jmpDir_short(label labl) %{
12582  match(Goto);
12583  effect(USE labl);
12584
12585  ins_cost(300);
12586  format %{ "JMP,s  $labl" %}
12587  size(2);
12588  ins_encode %{
12589    Label* L = $labl$$label;
12590    __ jmpb(*L);
12591  %}
12592  ins_pipe( pipe_jmp );
12593  ins_short_branch(1);
12594%}
12595
12596// Jump Direct Conditional - Label defines a relative address from Jcc+1
12597instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12598  match(If cop cr);
12599  effect(USE labl);
12600
12601  ins_cost(300);
12602  format %{ "J$cop,s  $labl" %}
12603  size(2);
12604  ins_encode %{
12605    Label* L = $labl$$label;
12606    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12607  %}
12608  ins_pipe( pipe_jcc );
12609  ins_short_branch(1);
12610%}
12611
12612// Jump Direct Conditional - Label defines a relative address from Jcc+1
12613instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12614  match(CountedLoopEnd cop cr);
12615  effect(USE labl);
12616
12617  ins_cost(300);
12618  format %{ "J$cop,s  $labl\t# Loop end" %}
12619  size(2);
12620  ins_encode %{
12621    Label* L = $labl$$label;
12622    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12623  %}
12624  ins_pipe( pipe_jcc );
12625  ins_short_branch(1);
12626%}
12627
12628// Jump Direct Conditional - Label defines a relative address from Jcc+1
12629instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12630  match(CountedLoopEnd cop cmp);
12631  effect(USE labl);
12632
12633  ins_cost(300);
12634  format %{ "J$cop,us $labl\t# Loop end" %}
12635  size(2);
12636  ins_encode %{
12637    Label* L = $labl$$label;
12638    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12639  %}
12640  ins_pipe( pipe_jcc );
12641  ins_short_branch(1);
12642%}
12643
12644instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12645  match(CountedLoopEnd cop cmp);
12646  effect(USE labl);
12647
12648  ins_cost(300);
12649  format %{ "J$cop,us $labl\t# Loop end" %}
12650  size(2);
12651  ins_encode %{
12652    Label* L = $labl$$label;
12653    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12654  %}
12655  ins_pipe( pipe_jcc );
12656  ins_short_branch(1);
12657%}
12658
12659// Jump Direct Conditional - using unsigned comparison
12660instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12661  match(If cop cmp);
12662  effect(USE labl);
12663
12664  ins_cost(300);
12665  format %{ "J$cop,us $labl" %}
12666  size(2);
12667  ins_encode %{
12668    Label* L = $labl$$label;
12669    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12670  %}
12671  ins_pipe( pipe_jcc );
12672  ins_short_branch(1);
12673%}
12674
12675instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12676  match(If cop cmp);
12677  effect(USE labl);
12678
12679  ins_cost(300);
12680  format %{ "J$cop,us $labl" %}
12681  size(2);
12682  ins_encode %{
12683    Label* L = $labl$$label;
12684    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12685  %}
12686  ins_pipe( pipe_jcc );
12687  ins_short_branch(1);
12688%}
12689
12690instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12691  match(If cop cmp);
12692  effect(USE labl);
12693
12694  ins_cost(300);
12695  format %{ $$template
12696    if ($cop$$cmpcode == Assembler::notEqual) {
12697      $$emit$$"JP,u,s   $labl\n\t"
12698      $$emit$$"J$cop,u,s   $labl"
12699    } else {
12700      $$emit$$"JP,u,s   done\n\t"
12701      $$emit$$"J$cop,u,s  $labl\n\t"
12702      $$emit$$"done:"
12703    }
12704  %}
12705  size(4);
12706  ins_encode %{
12707    Label* l = $labl$$label;
12708    if ($cop$$cmpcode == Assembler::notEqual) {
12709      __ jccb(Assembler::parity, *l);
12710      __ jccb(Assembler::notEqual, *l);
12711    } else if ($cop$$cmpcode == Assembler::equal) {
12712      Label done;
12713      __ jccb(Assembler::parity, done);
12714      __ jccb(Assembler::equal, *l);
12715      __ bind(done);
12716    } else {
12717       ShouldNotReachHere();
12718    }
12719  %}
12720  ins_pipe(pipe_jcc);
12721  ins_short_branch(1);
12722%}
12723
12724// ============================================================================
12725// Long Compare
12726//
12727// Currently we hold longs in 2 registers.  Comparing such values efficiently
12728// is tricky.  The flavor of compare used depends on whether we are testing
12729// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12730// The GE test is the negated LT test.  The LE test can be had by commuting
12731// the operands (yielding a GE test) and then negating; negate again for the
12732// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12733// NE test is negated from that.
12734
12735// Due to a shortcoming in the ADLC, it mixes up expressions like:
12736// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12737// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12738// are collapsed internally in the ADLC's dfa-gen code.  The match for
12739// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12740// foo match ends up with the wrong leaf.  One fix is to not match both
12741// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12742// both forms beat the trinary form of long-compare and both are very useful
12743// on Intel which has so few registers.
12744
12745// Manifest a CmpL result in an integer register.  Very painful.
12746// This is the test to avoid.
12747instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12748  match(Set dst (CmpL3 src1 src2));
12749  effect( KILL flags );
12750  ins_cost(1000);
12751  format %{ "XOR    $dst,$dst\n\t"
12752            "CMP    $src1.hi,$src2.hi\n\t"
12753            "JLT,s  m_one\n\t"
12754            "JGT,s  p_one\n\t"
12755            "CMP    $src1.lo,$src2.lo\n\t"
12756            "JB,s   m_one\n\t"
12757            "JEQ,s  done\n"
12758    "p_one:\tINC    $dst\n\t"
12759            "JMP,s  done\n"
12760    "m_one:\tDEC    $dst\n"
12761     "done:" %}
12762  ins_encode %{
12763    Label p_one, m_one, done;
12764    __ xorptr($dst$$Register, $dst$$Register);
12765    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12766    __ jccb(Assembler::less,    m_one);
12767    __ jccb(Assembler::greater, p_one);
12768    __ cmpl($src1$$Register, $src2$$Register);
12769    __ jccb(Assembler::below,   m_one);
12770    __ jccb(Assembler::equal,   done);
12771    __ bind(p_one);
12772    __ incrementl($dst$$Register);
12773    __ jmpb(done);
12774    __ bind(m_one);
12775    __ decrementl($dst$$Register);
12776    __ bind(done);
12777  %}
12778  ins_pipe( pipe_slow );
12779%}
12780
12781//======
12782// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12783// compares.  Can be used for LE or GT compares by reversing arguments.
12784// NOT GOOD FOR EQ/NE tests.
12785instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12786  match( Set flags (CmpL src zero ));
12787  ins_cost(100);
12788  format %{ "TEST   $src.hi,$src.hi" %}
12789  opcode(0x85);
12790  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12791  ins_pipe( ialu_cr_reg_reg );
12792%}
12793
12794// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12795// compares.  Can be used for LE or GT compares by reversing arguments.
12796// NOT GOOD FOR EQ/NE tests.
12797instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
12798  match( Set flags (CmpL src1 src2 ));
12799  effect( TEMP tmp );
12800  ins_cost(300);
12801  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12802            "MOV    $tmp,$src1.hi\n\t"
12803            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12804  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12805  ins_pipe( ialu_cr_reg_reg );
12806%}
12807
12808// Long compares reg < zero/req OR reg >= zero/req.
12809// Just a wrapper for a normal branch, plus the predicate test.
12810instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12811  match(If cmp flags);
12812  effect(USE labl);
12813  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12814  expand %{
12815    jmpCon(cmp,flags,labl);    // JLT or JGE...
12816  %}
12817%}
12818
12819// Compare 2 longs and CMOVE longs.
12820instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12821  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12822  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12823  ins_cost(400);
12824  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12825            "CMOV$cmp $dst.hi,$src.hi" %}
12826  opcode(0x0F,0x40);
12827  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12828  ins_pipe( pipe_cmov_reg_long );
12829%}
12830
12831instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12832  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12833  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12834  ins_cost(500);
12835  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12836            "CMOV$cmp $dst.hi,$src.hi" %}
12837  opcode(0x0F,0x40);
12838  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12839  ins_pipe( pipe_cmov_reg_long );
12840%}
12841
12842// Compare 2 longs and CMOVE ints.
12843instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
12844  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12845  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12846  ins_cost(200);
12847  format %{ "CMOV$cmp $dst,$src" %}
12848  opcode(0x0F,0x40);
12849  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12850  ins_pipe( pipe_cmov_reg );
12851%}
12852
12853instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
12854  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12855  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12856  ins_cost(250);
12857  format %{ "CMOV$cmp $dst,$src" %}
12858  opcode(0x0F,0x40);
12859  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12860  ins_pipe( pipe_cmov_mem );
12861%}
12862
12863// Compare 2 longs and CMOVE ints.
12864instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12865  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12866  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12867  ins_cost(200);
12868  format %{ "CMOV$cmp $dst,$src" %}
12869  opcode(0x0F,0x40);
12870  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12871  ins_pipe( pipe_cmov_reg );
12872%}
12873
12874// Compare 2 longs and CMOVE doubles
12875instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12876  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12877  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12878  ins_cost(200);
12879  expand %{
12880    fcmovDPR_regS(cmp,flags,dst,src);
12881  %}
12882%}
12883
12884// Compare 2 longs and CMOVE doubles
12885instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12886  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12887  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12888  ins_cost(200);
12889  expand %{
12890    fcmovD_regS(cmp,flags,dst,src);
12891  %}
12892%}
12893
12894instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12895  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12896  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12897  ins_cost(200);
12898  expand %{
12899    fcmovFPR_regS(cmp,flags,dst,src);
12900  %}
12901%}
12902
12903instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12904  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12905  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12906  ins_cost(200);
12907  expand %{
12908    fcmovF_regS(cmp,flags,dst,src);
12909  %}
12910%}
12911
12912//======
12913// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12914instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
12915  match( Set flags (CmpL src zero ));
12916  effect(TEMP tmp);
12917  ins_cost(200);
12918  format %{ "MOV    $tmp,$src.lo\n\t"
12919            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12920  ins_encode( long_cmp_flags0( src, tmp ) );
12921  ins_pipe( ialu_reg_reg_long );
12922%}
12923
12924// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12925instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12926  match( Set flags (CmpL src1 src2 ));
12927  ins_cost(200+300);
12928  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12929            "JNE,s  skip\n\t"
12930            "CMP    $src1.hi,$src2.hi\n\t"
12931     "skip:\t" %}
12932  ins_encode( long_cmp_flags1( src1, src2 ) );
12933  ins_pipe( ialu_cr_reg_reg );
12934%}
12935
12936// Long compare reg == zero/reg OR reg != zero/reg
12937// Just a wrapper for a normal branch, plus the predicate test.
12938instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12939  match(If cmp flags);
12940  effect(USE labl);
12941  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12942  expand %{
12943    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12944  %}
12945%}
12946
12947// Compare 2 longs and CMOVE longs.
12948instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12949  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12950  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12951  ins_cost(400);
12952  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12953            "CMOV$cmp $dst.hi,$src.hi" %}
12954  opcode(0x0F,0x40);
12955  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12956  ins_pipe( pipe_cmov_reg_long );
12957%}
12958
12959instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12960  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12961  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12962  ins_cost(500);
12963  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12964            "CMOV$cmp $dst.hi,$src.hi" %}
12965  opcode(0x0F,0x40);
12966  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12967  ins_pipe( pipe_cmov_reg_long );
12968%}
12969
12970// Compare 2 longs and CMOVE ints.
12971instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
12972  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12973  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12974  ins_cost(200);
12975  format %{ "CMOV$cmp $dst,$src" %}
12976  opcode(0x0F,0x40);
12977  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12978  ins_pipe( pipe_cmov_reg );
12979%}
12980
12981instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
12982  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12983  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12984  ins_cost(250);
12985  format %{ "CMOV$cmp $dst,$src" %}
12986  opcode(0x0F,0x40);
12987  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12988  ins_pipe( pipe_cmov_mem );
12989%}
12990
12991// Compare 2 longs and CMOVE ints.
12992instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12993  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12994  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12995  ins_cost(200);
12996  format %{ "CMOV$cmp $dst,$src" %}
12997  opcode(0x0F,0x40);
12998  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12999  ins_pipe( pipe_cmov_reg );
13000%}
13001
13002// Compare 2 longs and CMOVE doubles
13003instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
13004  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13005  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13006  ins_cost(200);
13007  expand %{
13008    fcmovDPR_regS(cmp,flags,dst,src);
13009  %}
13010%}
13011
13012// Compare 2 longs and CMOVE doubles
13013instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
13014  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13015  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13016  ins_cost(200);
13017  expand %{
13018    fcmovD_regS(cmp,flags,dst,src);
13019  %}
13020%}
13021
13022instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
13023  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13024  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13025  ins_cost(200);
13026  expand %{
13027    fcmovFPR_regS(cmp,flags,dst,src);
13028  %}
13029%}
13030
13031instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
13032  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
13033  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13034  ins_cost(200);
13035  expand %{
13036    fcmovF_regS(cmp,flags,dst,src);
13037  %}
13038%}
13039
13040//======
13041// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13042// Same as cmpL_reg_flags_LEGT except must negate src
13043instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
13044  match( Set flags (CmpL src zero ));
13045  effect( TEMP tmp );
13046  ins_cost(300);
13047  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
13048            "CMP    $tmp,$src.lo\n\t"
13049            "SBB    $tmp,$src.hi\n\t" %}
13050  ins_encode( long_cmp_flags3(src, tmp) );
13051  ins_pipe( ialu_reg_reg_long );
13052%}
13053
13054// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
13055// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
13056// requires a commuted test to get the same result.
13057instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
13058  match( Set flags (CmpL src1 src2 ));
13059  effect( TEMP tmp );
13060  ins_cost(300);
13061  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
13062            "MOV    $tmp,$src2.hi\n\t"
13063            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
13064  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
13065  ins_pipe( ialu_cr_reg_reg );
13066%}
13067
13068// Long compares reg < zero/req OR reg >= zero/req.
13069// Just a wrapper for a normal branch, plus the predicate test
13070instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
13071  match(If cmp flags);
13072  effect(USE labl);
13073  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
13074  ins_cost(300);
13075  expand %{
13076    jmpCon(cmp,flags,labl);    // JGT or JLE...
13077  %}
13078%}
13079
13080// Compare 2 longs and CMOVE longs.
13081instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
13082  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
13083  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13084  ins_cost(400);
13085  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13086            "CMOV$cmp $dst.hi,$src.hi" %}
13087  opcode(0x0F,0x40);
13088  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
13089  ins_pipe( pipe_cmov_reg_long );
13090%}
13091
13092instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
13093  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
13094  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13095  ins_cost(500);
13096  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
13097            "CMOV$cmp $dst.hi,$src.hi+4" %}
13098  opcode(0x0F,0x40);
13099  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
13100  ins_pipe( pipe_cmov_reg_long );
13101%}
13102
13103// Compare 2 longs and CMOVE ints.
13104instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
13105  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13106  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
13107  ins_cost(200);
13108  format %{ "CMOV$cmp $dst,$src" %}
13109  opcode(0x0F,0x40);
13110  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13111  ins_pipe( pipe_cmov_reg );
13112%}
13113
13114instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
13115  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13116  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
13117  ins_cost(250);
13118  format %{ "CMOV$cmp $dst,$src" %}
13119  opcode(0x0F,0x40);
13120  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
13121  ins_pipe( pipe_cmov_mem );
13122%}
13123
13124// Compare 2 longs and CMOVE ptrs.
13125instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
13126  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
13127  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
13128  ins_cost(200);
13129  format %{ "CMOV$cmp $dst,$src" %}
13130  opcode(0x0F,0x40);
13131  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
13132  ins_pipe( pipe_cmov_reg );
13133%}
13134
13135// Compare 2 longs and CMOVE doubles
13136instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
13137  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13138  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13139  ins_cost(200);
13140  expand %{
13141    fcmovDPR_regS(cmp,flags,dst,src);
13142  %}
13143%}
13144
13145// Compare 2 longs and CMOVE doubles
13146instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
13147  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13148  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
13149  ins_cost(200);
13150  expand %{
13151    fcmovD_regS(cmp,flags,dst,src);
13152  %}
13153%}
13154
13155instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
13156  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13157  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13158  ins_cost(200);
13159  expand %{
13160    fcmovFPR_regS(cmp,flags,dst,src);
13161  %}
13162%}
13163
13164
13165instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13166  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13167  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13168  ins_cost(200);
13169  expand %{
13170    fcmovF_regS(cmp,flags,dst,src);
13171  %}
13172%}
13173
13174
13175// ============================================================================
13176// Procedure Call/Return Instructions
13177// Call Java Static Instruction
13178// Note: If this code changes, the corresponding ret_addr_offset() and
13179//       compute_padding() functions will have to be adjusted.
13180instruct CallStaticJavaDirect(method meth) %{
13181  match(CallStaticJava);
13182  predicate(! ((CallStaticJavaNode*)n)->is_method_handle_invoke());
13183  effect(USE meth);
13184
13185  ins_cost(300);
13186  format %{ "CALL,static " %}
13187  opcode(0xE8); /* E8 cd */
13188  ins_encode( pre_call_FPU,
13189              Java_Static_Call( meth ),
13190              call_epilog,
13191              post_call_FPU );
13192  ins_pipe( pipe_slow );
13193  ins_alignment(4);
13194%}
13195
13196// Call Java Static Instruction (method handle version)
13197// Note: If this code changes, the corresponding ret_addr_offset() and
13198//       compute_padding() functions will have to be adjusted.
13199instruct CallStaticJavaHandle(method meth, eBPRegP ebp_mh_SP_save) %{
13200  match(CallStaticJava);
13201  predicate(((CallStaticJavaNode*)n)->is_method_handle_invoke());
13202  effect(USE meth);
13203  // EBP is saved by all callees (for interpreter stack correction).
13204  // We use it here for a similar purpose, in {preserve,restore}_SP.
13205
13206  ins_cost(300);
13207  format %{ "CALL,static/MethodHandle " %}
13208  opcode(0xE8); /* E8 cd */
13209  ins_encode( pre_call_FPU,
13210              preserve_SP,
13211              Java_Static_Call( meth ),
13212              restore_SP,
13213              call_epilog,
13214              post_call_FPU );
13215  ins_pipe( pipe_slow );
13216  ins_alignment(4);
13217%}
13218
13219// Call Java Dynamic Instruction
13220// Note: If this code changes, the corresponding ret_addr_offset() and
13221//       compute_padding() functions will have to be adjusted.
13222instruct CallDynamicJavaDirect(method meth) %{
13223  match(CallDynamicJava);
13224  effect(USE meth);
13225
13226  ins_cost(300);
13227  format %{ "MOV    EAX,(oop)-1\n\t"
13228            "CALL,dynamic" %}
13229  opcode(0xE8); /* E8 cd */
13230  ins_encode( pre_call_FPU,
13231              Java_Dynamic_Call( meth ),
13232              call_epilog,
13233              post_call_FPU );
13234  ins_pipe( pipe_slow );
13235  ins_alignment(4);
13236%}
13237
13238// Call Runtime Instruction
13239instruct CallRuntimeDirect(method meth) %{
13240  match(CallRuntime );
13241  effect(USE meth);
13242
13243  ins_cost(300);
13244  format %{ "CALL,runtime " %}
13245  opcode(0xE8); /* E8 cd */
13246  // Use FFREEs to clear entries in float stack
13247  ins_encode( pre_call_FPU,
13248              FFree_Float_Stack_All,
13249              Java_To_Runtime( meth ),
13250              post_call_FPU );
13251  ins_pipe( pipe_slow );
13252%}
13253
13254// Call runtime without safepoint
13255instruct CallLeafDirect(method meth) %{
13256  match(CallLeaf);
13257  effect(USE meth);
13258
13259  ins_cost(300);
13260  format %{ "CALL_LEAF,runtime " %}
13261  opcode(0xE8); /* E8 cd */
13262  ins_encode( pre_call_FPU,
13263              FFree_Float_Stack_All,
13264              Java_To_Runtime( meth ),
13265              Verify_FPU_For_Leaf, post_call_FPU );
13266  ins_pipe( pipe_slow );
13267%}
13268
13269instruct CallLeafNoFPDirect(method meth) %{
13270  match(CallLeafNoFP);
13271  effect(USE meth);
13272
13273  ins_cost(300);
13274  format %{ "CALL_LEAF_NOFP,runtime " %}
13275  opcode(0xE8); /* E8 cd */
13276  ins_encode(Java_To_Runtime(meth));
13277  ins_pipe( pipe_slow );
13278%}
13279
13280
13281// Return Instruction
13282// Remove the return address & jump to it.
13283instruct Ret() %{
13284  match(Return);
13285  format %{ "RET" %}
13286  opcode(0xC3);
13287  ins_encode(OpcP);
13288  ins_pipe( pipe_jmp );
13289%}
13290
13291// Tail Call; Jump from runtime stub to Java code.
13292// Also known as an 'interprocedural jump'.
13293// Target of jump will eventually return to caller.
13294// TailJump below removes the return address.
13295instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13296  match(TailCall jump_target method_oop );
13297  ins_cost(300);
13298  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13299  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13300  ins_encode( OpcP, RegOpc(jump_target) );
13301  ins_pipe( pipe_jmp );
13302%}
13303
13304
13305// Tail Jump; remove the return address; jump to target.
13306// TailCall above leaves the return address around.
13307instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13308  match( TailJump jump_target ex_oop );
13309  ins_cost(300);
13310  format %{ "POP    EDX\t# pop return address into dummy\n\t"
13311            "JMP    $jump_target " %}
13312  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13313  ins_encode( enc_pop_rdx,
13314              OpcP, RegOpc(jump_target) );
13315  ins_pipe( pipe_jmp );
13316%}
13317
13318// Create exception oop: created by stack-crawling runtime code.
13319// Created exception is now available to this handler, and is setup
13320// just prior to jumping to this handler.  No code emitted.
13321instruct CreateException( eAXRegP ex_oop )
13322%{
13323  match(Set ex_oop (CreateEx));
13324
13325  size(0);
13326  // use the following format syntax
13327  format %{ "# exception oop is in EAX; no code emitted" %}
13328  ins_encode();
13329  ins_pipe( empty );
13330%}
13331
13332
13333// Rethrow exception:
13334// The exception oop will come in the first argument position.
13335// Then JUMP (not call) to the rethrow stub code.
13336instruct RethrowException()
13337%{
13338  match(Rethrow);
13339
13340  // use the following format syntax
13341  format %{ "JMP    rethrow_stub" %}
13342  ins_encode(enc_rethrow);
13343  ins_pipe( pipe_jmp );
13344%}
13345
13346// inlined locking and unlocking
13347
13348
13349instruct cmpFastLock( eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13350  match( Set cr (FastLock object box) );
13351  effect( TEMP tmp, TEMP scr, USE_KILL box );
13352  ins_cost(300);
13353  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13354  ins_encode( Fast_Lock(object,box,tmp,scr) );
13355  ins_pipe( pipe_slow );
13356%}
13357
13358instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13359  match( Set cr (FastUnlock object box) );
13360  effect( TEMP tmp, USE_KILL box );
13361  ins_cost(300);
13362  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13363  ins_encode( Fast_Unlock(object,box,tmp) );
13364  ins_pipe( pipe_slow );
13365%}
13366
13367
13368
13369// ============================================================================
13370// Safepoint Instruction
13371instruct safePoint_poll(eFlagsReg cr) %{
13372  match(SafePoint);
13373  effect(KILL cr);
13374
13375  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13376  // On SPARC that might be acceptable as we can generate the address with
13377  // just a sethi, saving an or.  By polling at offset 0 we can end up
13378  // putting additional pressure on the index-0 in the D$.  Because of
13379  // alignment (just like the situation at hand) the lower indices tend
13380  // to see more traffic.  It'd be better to change the polling address
13381  // to offset 0 of the last $line in the polling page.
13382
13383  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13384  ins_cost(125);
13385  size(6) ;
13386  ins_encode( Safepoint_Poll() );
13387  ins_pipe( ialu_reg_mem );
13388%}
13389
13390
13391// ============================================================================
13392// This name is KNOWN by the ADLC and cannot be changed.
13393// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13394// for this guy.
13395instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13396  match(Set dst (ThreadLocal));
13397  effect(DEF dst, KILL cr);
13398
13399  format %{ "MOV    $dst, Thread::current()" %}
13400  ins_encode %{
13401    Register dstReg = as_Register($dst$$reg);
13402    __ get_thread(dstReg);
13403  %}
13404  ins_pipe( ialu_reg_fat );
13405%}
13406
13407
13408
13409//----------PEEPHOLE RULES-----------------------------------------------------
13410// These must follow all instruction definitions as they use the names
13411// defined in the instructions definitions.
13412//
13413// peepmatch ( root_instr_name [preceding_instruction]* );
13414//
13415// peepconstraint %{
13416// (instruction_number.operand_name relational_op instruction_number.operand_name
13417//  [, ...] );
13418// // instruction numbers are zero-based using left to right order in peepmatch
13419//
13420// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13421// // provide an instruction_number.operand_name for each operand that appears
13422// // in the replacement instruction's match rule
13423//
13424// ---------VM FLAGS---------------------------------------------------------
13425//
13426// All peephole optimizations can be turned off using -XX:-OptoPeephole
13427//
13428// Each peephole rule is given an identifying number starting with zero and
13429// increasing by one in the order seen by the parser.  An individual peephole
13430// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13431// on the command-line.
13432//
13433// ---------CURRENT LIMITATIONS----------------------------------------------
13434//
13435// Only match adjacent instructions in same basic block
13436// Only equality constraints
13437// Only constraints between operands, not (0.dest_reg == EAX_enc)
13438// Only one replacement instruction
13439//
13440// ---------EXAMPLE----------------------------------------------------------
13441//
13442// // pertinent parts of existing instructions in architecture description
13443// instruct movI(eRegI dst, eRegI src) %{
13444//   match(Set dst (CopyI src));
13445// %}
13446//
13447// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
13448//   match(Set dst (AddI dst src));
13449//   effect(KILL cr);
13450// %}
13451//
13452// // Change (inc mov) to lea
13453// peephole %{
13454//   // increment preceeded by register-register move
13455//   peepmatch ( incI_eReg movI );
13456//   // require that the destination register of the increment
13457//   // match the destination register of the move
13458//   peepconstraint ( 0.dst == 1.dst );
13459//   // construct a replacement instruction that sets
13460//   // the destination to ( move's source register + one )
13461//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13462// %}
13463//
13464// Implementation no longer uses movX instructions since
13465// machine-independent system no longer uses CopyX nodes.
13466//
13467// peephole %{
13468//   peepmatch ( incI_eReg movI );
13469//   peepconstraint ( 0.dst == 1.dst );
13470//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13471// %}
13472//
13473// peephole %{
13474//   peepmatch ( decI_eReg movI );
13475//   peepconstraint ( 0.dst == 1.dst );
13476//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13477// %}
13478//
13479// peephole %{
13480//   peepmatch ( addI_eReg_imm movI );
13481//   peepconstraint ( 0.dst == 1.dst );
13482//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13483// %}
13484//
13485// peephole %{
13486//   peepmatch ( addP_eReg_imm movP );
13487//   peepconstraint ( 0.dst == 1.dst );
13488//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13489// %}
13490
13491// // Change load of spilled value to only a spill
13492// instruct storeI(memory mem, eRegI src) %{
13493//   match(Set mem (StoreI mem src));
13494// %}
13495//
13496// instruct loadI(eRegI dst, memory mem) %{
13497//   match(Set dst (LoadI mem));
13498// %}
13499//
13500peephole %{
13501  peepmatch ( loadI storeI );
13502  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13503  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13504%}
13505
13506//----------SMARTSPILL RULES---------------------------------------------------
13507// These must follow all instruction definitions as they use the names
13508// defined in the instructions definitions.
13509