x86_32.ad revision 605:98cb887364d3
1//
2// Copyright 1997-2008 Sun Microsystems, Inc.  All Rights Reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Sun Microsystems, Inc., 4150 Network Circle, Santa Clara,
20// CA 95054 USA or visit www.sun.com if you need additional information or
21// have any questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Special Registers
78reg_def EFLAGS(SOC, SOC, 0, 8, VMRegImpl::Bad());
79
80// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
81// allocator, and only shows up in the encodings.
82reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
83reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
84// Ok so here's the trick FPR1 is really st(0) except in the midst
85// of emission of assembly for a machnode. During the emission the fpu stack
86// is pushed making FPR1 == st(1) temporarily. However at any safepoint
87// the stack will not have this element so FPR1 == st(0) from the
88// oopMap viewpoint. This same weirdness with numbering causes
89// instruction encoding to have to play games with the register
90// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
91// where it does flt->flt moves to see an example
92//
93reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
94reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
95reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
96reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
97reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
98reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
99reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
100reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
101reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
102reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
103reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
104reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
105reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
106reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
107
108// XMM registers.  128-bit registers or 4 words each, labeled a-d.
109// Word a in each register holds a Float, words ab hold a Double.
110// We currently do not use the SIMD capabilities, so registers cd
111// are unused at the moment.
112reg_def XMM0a( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg());
113reg_def XMM0b( SOC, SOC, Op_RegF, 0, xmm0->as_VMReg()->next());
114reg_def XMM1a( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg());
115reg_def XMM1b( SOC, SOC, Op_RegF, 1, xmm1->as_VMReg()->next());
116reg_def XMM2a( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg());
117reg_def XMM2b( SOC, SOC, Op_RegF, 2, xmm2->as_VMReg()->next());
118reg_def XMM3a( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg());
119reg_def XMM3b( SOC, SOC, Op_RegF, 3, xmm3->as_VMReg()->next());
120reg_def XMM4a( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg());
121reg_def XMM4b( SOC, SOC, Op_RegF, 4, xmm4->as_VMReg()->next());
122reg_def XMM5a( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg());
123reg_def XMM5b( SOC, SOC, Op_RegF, 5, xmm5->as_VMReg()->next());
124reg_def XMM6a( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg());
125reg_def XMM6b( SOC, SOC, Op_RegF, 6, xmm6->as_VMReg()->next());
126reg_def XMM7a( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg());
127reg_def XMM7b( SOC, SOC, Op_RegF, 7, xmm7->as_VMReg()->next());
128
129// Specify priority of register selection within phases of register
130// allocation.  Highest priority is first.  A useful heuristic is to
131// give registers a low priority when they are required by machine
132// instructions, like EAX and EDX.  Registers which are used as
133// pairs must fall on an even boundary (witness the FPR#L's in this list).
134// For the Intel integer registers, the equivalent Long pairs are
135// EDX:EAX, EBX:ECX, and EDI:EBP.
136alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
137                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
138                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
139                    FPR6L, FPR6H, FPR7L, FPR7H );
140
141alloc_class chunk1( XMM0a, XMM0b,
142                    XMM1a, XMM1b,
143                    XMM2a, XMM2b,
144                    XMM3a, XMM3b,
145                    XMM4a, XMM4b,
146                    XMM5a, XMM5b,
147                    XMM6a, XMM6b,
148                    XMM7a, XMM7b, EFLAGS);
149
150
151//----------Architecture Description Register Classes--------------------------
152// Several register classes are automatically defined based upon information in
153// this architecture description.
154// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
155// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
156// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
157// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
158//
159// Class for all registers
160reg_class any_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
161// Class for general registers
162reg_class e_reg(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
163// Class for general registers which may be used for implicit null checks on win95
164// Also safe for use by tailjump. We don't want to allocate in rbp,
165reg_class e_reg_no_rbp(EAX, EDX, EDI, ESI, ECX, EBX);
166// Class of "X" registers
167reg_class x_reg(EBX, ECX, EDX, EAX);
168// Class of registers that can appear in an address with no offset.
169// EBP and ESP require an extra instruction byte for zero offset.
170// Used in fast-unlock
171reg_class p_reg(EDX, EDI, ESI, EBX);
172// Class for general registers not including ECX
173reg_class ncx_reg(EAX, EDX, EBP, EDI, ESI, EBX);
174// Class for general registers not including EAX
175reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
176// Class for general registers not including EAX or EBX.
177reg_class nabx_reg(EDX, EDI, ESI, ECX, EBP);
178// Class of EAX (for multiply and divide operations)
179reg_class eax_reg(EAX);
180// Class of EBX (for atomic add)
181reg_class ebx_reg(EBX);
182// Class of ECX (for shift and JCXZ operations and cmpLTMask)
183reg_class ecx_reg(ECX);
184// Class of EDX (for multiply and divide operations)
185reg_class edx_reg(EDX);
186// Class of EDI (for synchronization)
187reg_class edi_reg(EDI);
188// Class of ESI (for synchronization)
189reg_class esi_reg(ESI);
190// Singleton class for interpreter's stack pointer
191reg_class ebp_reg(EBP);
192// Singleton class for stack pointer
193reg_class sp_reg(ESP);
194// Singleton class for instruction pointer
195// reg_class ip_reg(EIP);
196// Singleton class for condition codes
197reg_class int_flags(EFLAGS);
198// Class of integer register pairs
199reg_class long_reg( EAX,EDX, ECX,EBX, EBP,EDI );
200// Class of integer register pairs that aligns with calling convention
201reg_class eadx_reg( EAX,EDX );
202reg_class ebcx_reg( ECX,EBX );
203// Not AX or DX, used in divides
204reg_class nadx_reg( EBX,ECX,ESI,EDI,EBP );
205
206// Floating point registers.  Notice FPR0 is not a choice.
207// FPR0 is not ever allocated; we use clever encodings to fake
208// a 2-address instructions out of Intels FP stack.
209reg_class flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
210
211// make a register class for SSE registers
212reg_class xmm_reg(XMM0a, XMM1a, XMM2a, XMM3a, XMM4a, XMM5a, XMM6a, XMM7a);
213
214// make a double register class for SSE2 registers
215reg_class xdb_reg(XMM0a,XMM0b, XMM1a,XMM1b, XMM2a,XMM2b, XMM3a,XMM3b,
216                  XMM4a,XMM4b, XMM5a,XMM5b, XMM6a,XMM6b, XMM7a,XMM7b );
217
218reg_class dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
219                   FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
220                   FPR7L,FPR7H );
221
222reg_class flt_reg0( FPR1L );
223reg_class dbl_reg0( FPR1L,FPR1H );
224reg_class dbl_reg1( FPR2L,FPR2H );
225reg_class dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
226                       FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
227
228// XMM6 and XMM7 could be used as temporary registers for long, float and
229// double values for SSE2.
230reg_class xdb_reg6( XMM6a,XMM6b );
231reg_class xdb_reg7( XMM7a,XMM7b );
232%}
233
234
235//----------SOURCE BLOCK-------------------------------------------------------
236// This is a block of C++ code which provides values, functions, and
237// definitions necessary in the rest of the architecture description
238source %{
239#define   RELOC_IMM32    Assembler::imm_operand
240#define   RELOC_DISP32   Assembler::disp32_operand
241
242#define __ _masm.
243
244// How to find the high register of a Long pair, given the low register
245#define   HIGH_FROM_LOW(x) ((x)+2)
246
247// These masks are used to provide 128-bit aligned bitmasks to the XMM
248// instructions, to allow sign-masking or sign-bit flipping.  They allow
249// fast versions of NegF/NegD and AbsF/AbsD.
250
251// Note: 'double' and 'long long' have 32-bits alignment on x86.
252static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
253  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
254  // of 128-bits operands for SSE instructions.
255  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
256  // Store the value to a 128-bits operand.
257  operand[0] = lo;
258  operand[1] = hi;
259  return operand;
260}
261
262// Buffer for 128-bits masks used by SSE instructions.
263static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
264
265// Static initialization during VM startup.
266static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
267static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
268static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
269static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
270
271// !!!!! Special hack to get all type of calls to specify the byte offset
272//       from the start of the call to the point where the return address
273//       will point.
274int MachCallStaticJavaNode::ret_addr_offset() {
275  return 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);  // 5 bytes from start of call to where return address points
276}
277
278int MachCallDynamicJavaNode::ret_addr_offset() {
279  return 10 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);  // 10 bytes from start of call to where return address points
280}
281
282static int sizeof_FFree_Float_Stack_All = -1;
283
284int MachCallRuntimeNode::ret_addr_offset() {
285  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
286  return sizeof_FFree_Float_Stack_All + 5 + (Compile::current()->in_24_bit_fp_mode() ? 6 : 0);
287}
288
289// Indicate if the safepoint node needs the polling page as an input.
290// Since x86 does have absolute addressing, it doesn't.
291bool SafePointNode::needs_polling_address_input() {
292  return false;
293}
294
295//
296// Compute padding required for nodes which need alignment
297//
298
299// The address of the call instruction needs to be 4-byte aligned to
300// ensure that it does not span a cache line so that it can be patched.
301int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
302  if (Compile::current()->in_24_bit_fp_mode())
303    current_offset += 6;    // skip fldcw in pre_call_FPU, if any
304  current_offset += 1;      // skip call opcode byte
305  return round_to(current_offset, alignment_required()) - current_offset;
306}
307
308// The address of the call instruction needs to be 4-byte aligned to
309// ensure that it does not span a cache line so that it can be patched.
310int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
311  if (Compile::current()->in_24_bit_fp_mode())
312    current_offset += 6;    // skip fldcw in pre_call_FPU, if any
313  current_offset += 5;      // skip MOV instruction
314  current_offset += 1;      // skip call opcode byte
315  return round_to(current_offset, alignment_required()) - current_offset;
316}
317
318#ifndef PRODUCT
319void MachBreakpointNode::format( PhaseRegAlloc *, outputStream* st ) const {
320  st->print("INT3");
321}
322#endif
323
324// EMIT_RM()
325void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
326  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
327  *(cbuf.code_end()) = c;
328  cbuf.set_code_end(cbuf.code_end() + 1);
329}
330
331// EMIT_CC()
332void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
333  unsigned char c = (unsigned char)( f1 | f2 );
334  *(cbuf.code_end()) = c;
335  cbuf.set_code_end(cbuf.code_end() + 1);
336}
337
338// EMIT_OPCODE()
339void emit_opcode(CodeBuffer &cbuf, int code) {
340  *(cbuf.code_end()) = (unsigned char)code;
341  cbuf.set_code_end(cbuf.code_end() + 1);
342}
343
344// EMIT_OPCODE() w/ relocation information
345void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
346  cbuf.relocate(cbuf.inst_mark() + offset, reloc);
347  emit_opcode(cbuf, code);
348}
349
350// EMIT_D8()
351void emit_d8(CodeBuffer &cbuf, int d8) {
352  *(cbuf.code_end()) = (unsigned char)d8;
353  cbuf.set_code_end(cbuf.code_end() + 1);
354}
355
356// EMIT_D16()
357void emit_d16(CodeBuffer &cbuf, int d16) {
358  *((short *)(cbuf.code_end())) = d16;
359  cbuf.set_code_end(cbuf.code_end() + 2);
360}
361
362// EMIT_D32()
363void emit_d32(CodeBuffer &cbuf, int d32) {
364  *((int *)(cbuf.code_end())) = d32;
365  cbuf.set_code_end(cbuf.code_end() + 4);
366}
367
368// emit 32 bit value and construct relocation entry from relocInfo::relocType
369void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
370        int format) {
371  cbuf.relocate(cbuf.inst_mark(), reloc, format);
372
373  *((int *)(cbuf.code_end())) = d32;
374  cbuf.set_code_end(cbuf.code_end() + 4);
375}
376
377// emit 32 bit value and construct relocation entry from RelocationHolder
378void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
379        int format) {
380#ifdef ASSERT
381  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
382    assert(oop(d32)->is_oop() && oop(d32)->is_perm(), "cannot embed non-perm oops in code");
383  }
384#endif
385  cbuf.relocate(cbuf.inst_mark(), rspec, format);
386
387  *((int *)(cbuf.code_end())) = d32;
388  cbuf.set_code_end(cbuf.code_end() + 4);
389}
390
391// Access stack slot for load or store
392void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
393  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
394  if( -128 <= disp && disp <= 127 ) {
395    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
396    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
397    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
398  } else {
399    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
400    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
401    emit_d32(cbuf, disp);     // Displacement  // R/M byte
402  }
403}
404
405   // eRegI ereg, memory mem) %{    // emit_reg_mem
406void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, bool displace_is_oop ) {
407  // There is no index & no scale, use form without SIB byte
408  if ((index == 0x4) &&
409      (scale == 0) && (base != ESP_enc)) {
410    // If no displacement, mode is 0x0; unless base is [EBP]
411    if ( (displace == 0) && (base != EBP_enc) ) {
412      emit_rm(cbuf, 0x0, reg_encoding, base);
413    }
414    else {                    // If 8-bit displacement, mode 0x1
415      if ((displace >= -128) && (displace <= 127)
416          && !(displace_is_oop) ) {
417        emit_rm(cbuf, 0x1, reg_encoding, base);
418        emit_d8(cbuf, displace);
419      }
420      else {                  // If 32-bit displacement
421        if (base == -1) { // Special flag for absolute address
422          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
423          // (manual lies; no SIB needed here)
424          if ( displace_is_oop ) {
425            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
426          } else {
427            emit_d32      (cbuf, displace);
428          }
429        }
430        else {                // Normal base + offset
431          emit_rm(cbuf, 0x2, reg_encoding, base);
432          if ( displace_is_oop ) {
433            emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
434          } else {
435            emit_d32      (cbuf, displace);
436          }
437        }
438      }
439    }
440  }
441  else {                      // Else, encode with the SIB byte
442    // If no displacement, mode is 0x0; unless base is [EBP]
443    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
444      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
445      emit_rm(cbuf, scale, index, base);
446    }
447    else {                    // If 8-bit displacement, mode 0x1
448      if ((displace >= -128) && (displace <= 127)
449          && !(displace_is_oop) ) {
450        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
451        emit_rm(cbuf, scale, index, base);
452        emit_d8(cbuf, displace);
453      }
454      else {                  // If 32-bit displacement
455        if (base == 0x04 ) {
456          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
457          emit_rm(cbuf, scale, index, 0x04);
458        } else {
459          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
460          emit_rm(cbuf, scale, index, base);
461        }
462        if ( displace_is_oop ) {
463          emit_d32_reloc(cbuf, displace, relocInfo::oop_type, 1);
464        } else {
465          emit_d32      (cbuf, displace);
466        }
467      }
468    }
469  }
470}
471
472
473void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
474  if( dst_encoding == src_encoding ) {
475    // reg-reg copy, use an empty encoding
476  } else {
477    emit_opcode( cbuf, 0x8B );
478    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
479  }
480}
481
482void encode_CopyXD( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
483  if( dst_encoding == src_encoding ) {
484    // reg-reg copy, use an empty encoding
485  } else {
486    MacroAssembler _masm(&cbuf);
487
488    __ movdqa(as_XMMRegister(dst_encoding), as_XMMRegister(src_encoding));
489  }
490}
491
492
493//=============================================================================
494#ifndef PRODUCT
495void MachPrologNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
496  Compile* C = ra_->C;
497  if( C->in_24_bit_fp_mode() ) {
498    st->print("FLDCW  24 bit fpu control word");
499    st->print_cr(""); st->print("\t");
500  }
501
502  int framesize = C->frame_slots() << LogBytesPerInt;
503  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
504  // Remove two words for return addr and rbp,
505  framesize -= 2*wordSize;
506
507  // Calls to C2R adapters often do not accept exceptional returns.
508  // We require that their callers must bang for them.  But be careful, because
509  // some VM calls (such as call site linkage) can use several kilobytes of
510  // stack.  But the stack safety zone should account for that.
511  // See bugs 4446381, 4468289, 4497237.
512  if (C->need_stack_bang(framesize)) {
513    st->print_cr("# stack bang"); st->print("\t");
514  }
515  st->print_cr("PUSHL  EBP"); st->print("\t");
516
517  if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
518    st->print("PUSH   0xBADB100D\t# Majik cookie for stack depth check");
519    st->print_cr(""); st->print("\t");
520    framesize -= wordSize;
521  }
522
523  if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
524    if (framesize) {
525      st->print("SUB    ESP,%d\t# Create frame",framesize);
526    }
527  } else {
528    st->print("SUB    ESP,%d\t# Create frame",framesize);
529  }
530}
531#endif
532
533
534void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
535  Compile* C = ra_->C;
536
537  if (UseSSE >= 2 && VerifyFPU) {
538    MacroAssembler masm(&cbuf);
539    masm.verify_FPU(0, "FPU stack must be clean on entry");
540  }
541
542  // WARNING: Initial instruction MUST be 5 bytes or longer so that
543  // NativeJump::patch_verified_entry will be able to patch out the entry
544  // code safely. The fldcw is ok at 6 bytes, the push to verify stack
545  // depth is ok at 5 bytes, the frame allocation can be either 3 or
546  // 6 bytes. So if we don't do the fldcw or the push then we must
547  // use the 6 byte frame allocation even if we have no frame. :-(
548  // If method sets FPU control word do it now
549  if( C->in_24_bit_fp_mode() ) {
550    MacroAssembler masm(&cbuf);
551    masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
552  }
553
554  int framesize = C->frame_slots() << LogBytesPerInt;
555  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
556  // Remove two words for return addr and rbp,
557  framesize -= 2*wordSize;
558
559  // Calls to C2R adapters often do not accept exceptional returns.
560  // We require that their callers must bang for them.  But be careful, because
561  // some VM calls (such as call site linkage) can use several kilobytes of
562  // stack.  But the stack safety zone should account for that.
563  // See bugs 4446381, 4468289, 4497237.
564  if (C->need_stack_bang(framesize)) {
565    MacroAssembler masm(&cbuf);
566    masm.generate_stack_overflow_check(framesize);
567  }
568
569  // We always push rbp, so that on return to interpreter rbp, will be
570  // restored correctly and we can correct the stack.
571  emit_opcode(cbuf, 0x50 | EBP_enc);
572
573  if( VerifyStackAtCalls ) { // Majik cookie to verify stack depth
574    emit_opcode(cbuf, 0x68); // push 0xbadb100d
575    emit_d32(cbuf, 0xbadb100d);
576    framesize -= wordSize;
577  }
578
579  if ((C->in_24_bit_fp_mode() || VerifyStackAtCalls ) && framesize < 128 ) {
580    if (framesize) {
581      emit_opcode(cbuf, 0x83);   // sub  SP,#framesize
582      emit_rm(cbuf, 0x3, 0x05, ESP_enc);
583      emit_d8(cbuf, framesize);
584    }
585  } else {
586    emit_opcode(cbuf, 0x81);   // sub  SP,#framesize
587    emit_rm(cbuf, 0x3, 0x05, ESP_enc);
588    emit_d32(cbuf, framesize);
589  }
590  C->set_frame_complete(cbuf.code_end() - cbuf.code_begin());
591
592#ifdef ASSERT
593  if (VerifyStackAtCalls) {
594    Label L;
595    MacroAssembler masm(&cbuf);
596    masm.push(rax);
597    masm.mov(rax, rsp);
598    masm.andptr(rax, StackAlignmentInBytes-1);
599    masm.cmpptr(rax, StackAlignmentInBytes-wordSize);
600    masm.pop(rax);
601    masm.jcc(Assembler::equal, L);
602    masm.stop("Stack is not properly aligned!");
603    masm.bind(L);
604  }
605#endif
606
607}
608
609uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
610  return MachNode::size(ra_); // too many variables; just compute it the hard way
611}
612
613int MachPrologNode::reloc() const {
614  return 0; // a large enough number
615}
616
617//=============================================================================
618#ifndef PRODUCT
619void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
620  Compile *C = ra_->C;
621  int framesize = C->frame_slots() << LogBytesPerInt;
622  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
623  // Remove two words for return addr and rbp,
624  framesize -= 2*wordSize;
625
626  if( C->in_24_bit_fp_mode() ) {
627    st->print("FLDCW  standard control word");
628    st->cr(); st->print("\t");
629  }
630  if( framesize ) {
631    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
632    st->cr(); st->print("\t");
633  }
634  st->print_cr("POPL   EBP"); st->print("\t");
635  if( do_polling() && C->is_method_compilation() ) {
636    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
637    st->cr(); st->print("\t");
638  }
639}
640#endif
641
642void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
643  Compile *C = ra_->C;
644
645  // If method set FPU control word, restore to standard control word
646  if( C->in_24_bit_fp_mode() ) {
647    MacroAssembler masm(&cbuf);
648    masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
649  }
650
651  int framesize = C->frame_slots() << LogBytesPerInt;
652  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
653  // Remove two words for return addr and rbp,
654  framesize -= 2*wordSize;
655
656  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
657
658  if( framesize >= 128 ) {
659    emit_opcode(cbuf, 0x81); // add  SP, #framesize
660    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
661    emit_d32(cbuf, framesize);
662  }
663  else if( framesize ) {
664    emit_opcode(cbuf, 0x83); // add  SP, #framesize
665    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
666    emit_d8(cbuf, framesize);
667  }
668
669  emit_opcode(cbuf, 0x58 | EBP_enc);
670
671  if( do_polling() && C->is_method_compilation() ) {
672    cbuf.relocate(cbuf.code_end(), relocInfo::poll_return_type, 0);
673    emit_opcode(cbuf,0x85);
674    emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
675    emit_d32(cbuf, (intptr_t)os::get_polling_page());
676  }
677}
678
679uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
680  Compile *C = ra_->C;
681  // If method set FPU control word, restore to standard control word
682  int size = C->in_24_bit_fp_mode() ? 6 : 0;
683  if( do_polling() && C->is_method_compilation() ) size += 6;
684
685  int framesize = C->frame_slots() << LogBytesPerInt;
686  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687  // Remove two words for return addr and rbp,
688  framesize -= 2*wordSize;
689
690  size++; // popl rbp,
691
692  if( framesize >= 128 ) {
693    size += 6;
694  } else {
695    size += framesize ? 3 : 0;
696  }
697  return size;
698}
699
700int MachEpilogNode::reloc() const {
701  return 0; // a large enough number
702}
703
704const Pipeline * MachEpilogNode::pipeline() const {
705  return MachNode::pipeline_class();
706}
707
708int MachEpilogNode::safepoint_offset() const { return 0; }
709
710//=============================================================================
711
712enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
713static enum RC rc_class( OptoReg::Name reg ) {
714
715  if( !OptoReg::is_valid(reg)  ) return rc_bad;
716  if (OptoReg::is_stack(reg)) return rc_stack;
717
718  VMReg r = OptoReg::as_VMReg(reg);
719  if (r->is_Register()) return rc_int;
720  if (r->is_FloatRegister()) {
721    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
722    return rc_float;
723  }
724  assert(r->is_XMMRegister(), "must be");
725  return rc_xmm;
726}
727
728static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
729                        int opcode, const char *op_str, int size, outputStream* st ) {
730  if( cbuf ) {
731    emit_opcode  (*cbuf, opcode );
732    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, false);
733#ifndef PRODUCT
734  } else if( !do_size ) {
735    if( size != 0 ) st->print("\n\t");
736    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
737      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
738      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
739    } else { // FLD, FST, PUSH, POP
740      st->print("%s [ESP + #%d]",op_str,offset);
741    }
742#endif
743  }
744  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
745  return size+3+offset_size;
746}
747
748// Helper for XMM registers.  Extra opcode bits, limited syntax.
749static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
750                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
751  if( cbuf ) {
752    if( reg_lo+1 == reg_hi ) { // double move?
753      if( is_load && !UseXmmLoadAndClearUpper )
754        emit_opcode(*cbuf, 0x66 ); // use 'movlpd' for load
755      else
756        emit_opcode(*cbuf, 0xF2 ); // use 'movsd' otherwise
757    } else {
758      emit_opcode(*cbuf, 0xF3 );
759    }
760    emit_opcode(*cbuf, 0x0F );
761    if( reg_lo+1 == reg_hi && is_load && !UseXmmLoadAndClearUpper )
762      emit_opcode(*cbuf, 0x12 );   // use 'movlpd' for load
763    else
764      emit_opcode(*cbuf, is_load ? 0x10 : 0x11 );
765    encode_RegMem(*cbuf, Matcher::_regEncode[reg_lo], ESP_enc, 0x4, 0, offset, false);
766#ifndef PRODUCT
767  } else if( !do_size ) {
768    if( size != 0 ) st->print("\n\t");
769    if( reg_lo+1 == reg_hi ) { // double move?
770      if( is_load ) st->print("%s %s,[ESP + #%d]",
771                               UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
772                               Matcher::regName[reg_lo], offset);
773      else          st->print("MOVSD  [ESP + #%d],%s",
774                               offset, Matcher::regName[reg_lo]);
775    } else {
776      if( is_load ) st->print("MOVSS  %s,[ESP + #%d]",
777                               Matcher::regName[reg_lo], offset);
778      else          st->print("MOVSS  [ESP + #%d],%s",
779                               offset, Matcher::regName[reg_lo]);
780    }
781#endif
782  }
783  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
784  return size+5+offset_size;
785}
786
787
788static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
789                            int src_hi, int dst_hi, int size, outputStream* st ) {
790  if( UseXmmRegToRegMoveAll ) {//Use movaps,movapd to move between xmm registers
791    if( cbuf ) {
792      if( (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ) {
793        emit_opcode(*cbuf, 0x66 );
794      }
795      emit_opcode(*cbuf, 0x0F );
796      emit_opcode(*cbuf, 0x28 );
797      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
798#ifndef PRODUCT
799    } else if( !do_size ) {
800      if( size != 0 ) st->print("\n\t");
801      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
802        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
803      } else {
804        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
805      }
806#endif
807    }
808    return size + ((src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 4 : 3);
809  } else {
810    if( cbuf ) {
811      emit_opcode(*cbuf, (src_lo+1 == src_hi && dst_lo+1 == dst_hi) ? 0xF2 : 0xF3 );
812      emit_opcode(*cbuf, 0x0F );
813      emit_opcode(*cbuf, 0x10 );
814      emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst_lo], Matcher::_regEncode[src_lo] );
815#ifndef PRODUCT
816    } else if( !do_size ) {
817      if( size != 0 ) st->print("\n\t");
818      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
819        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
820      } else {
821        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
822      }
823#endif
824    }
825    return size+4;
826  }
827}
828
829static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
830  if( cbuf ) {
831    emit_opcode(*cbuf, 0x8B );
832    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
833#ifndef PRODUCT
834  } else if( !do_size ) {
835    if( size != 0 ) st->print("\n\t");
836    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
837#endif
838  }
839  return size+2;
840}
841
842static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
843                                 int offset, int size, outputStream* st ) {
844  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
845    if( cbuf ) {
846      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
847      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
848#ifndef PRODUCT
849    } else if( !do_size ) {
850      if( size != 0 ) st->print("\n\t");
851      st->print("FLD    %s",Matcher::regName[src_lo]);
852#endif
853    }
854    size += 2;
855  }
856
857  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
858  const char *op_str;
859  int op;
860  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
861    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
862    op = 0xDD;
863  } else {                   // 32-bit store
864    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
865    op = 0xD9;
866    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
867  }
868
869  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
870}
871
872uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
873  // Get registers to move
874  OptoReg::Name src_second = ra_->get_reg_second(in(1));
875  OptoReg::Name src_first = ra_->get_reg_first(in(1));
876  OptoReg::Name dst_second = ra_->get_reg_second(this );
877  OptoReg::Name dst_first = ra_->get_reg_first(this );
878
879  enum RC src_second_rc = rc_class(src_second);
880  enum RC src_first_rc = rc_class(src_first);
881  enum RC dst_second_rc = rc_class(dst_second);
882  enum RC dst_first_rc = rc_class(dst_first);
883
884  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
885
886  // Generate spill code!
887  int size = 0;
888
889  if( src_first == dst_first && src_second == dst_second )
890    return size;            // Self copy, no move
891
892  // --------------------------------------
893  // Check for mem-mem move.  push/pop to move.
894  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
895    if( src_second == dst_first ) { // overlapping stack copy ranges
896      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
897      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
898      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
899      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
900    }
901    // move low bits
902    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
903    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
904    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
905      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
906      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
907    }
908    return size;
909  }
910
911  // --------------------------------------
912  // Check for integer reg-reg copy
913  if( src_first_rc == rc_int && dst_first_rc == rc_int )
914    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
915
916  // Check for integer store
917  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
918    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
919
920  // Check for integer load
921  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
922    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
923
924  // --------------------------------------
925  // Check for float reg-reg copy
926  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
927    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
928            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
929    if( cbuf ) {
930
931      // Note the mucking with the register encode to compensate for the 0/1
932      // indexing issue mentioned in a comment in the reg_def sections
933      // for FPR registers many lines above here.
934
935      if( src_first != FPR1L_num ) {
936        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
937        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
938        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
939        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
940     } else {
941        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
942        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
943     }
944#ifndef PRODUCT
945    } else if( !do_size ) {
946      if( size != 0 ) st->print("\n\t");
947      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
948      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
949#endif
950    }
951    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
952  }
953
954  // Check for float store
955  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
956    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
957  }
958
959  // Check for float load
960  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
961    int offset = ra_->reg2offset(src_first);
962    const char *op_str;
963    int op;
964    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
965      op_str = "FLD_D";
966      op = 0xDD;
967    } else {                   // 32-bit load
968      op_str = "FLD_S";
969      op = 0xD9;
970      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
971    }
972    if( cbuf ) {
973      emit_opcode  (*cbuf, op );
974      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, false);
975      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
976      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
977#ifndef PRODUCT
978    } else if( !do_size ) {
979      if( size != 0 ) st->print("\n\t");
980      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
981#endif
982    }
983    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
984    return size + 3+offset_size+2;
985  }
986
987  // Check for xmm reg-reg copy
988  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
989    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
990            (src_first+1 == src_second && dst_first+1 == dst_second),
991            "no non-adjacent float-moves" );
992    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
993  }
994
995  // Check for xmm store
996  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
997    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
998  }
999
1000  // Check for float xmm load
1001  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1002    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1003  }
1004
1005  // Copy from float reg to xmm reg
1006  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1007    // copy to the top of stack from floating point reg
1008    // and use LEA to preserve flags
1009    if( cbuf ) {
1010      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1011      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1012      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1013      emit_d8(*cbuf,0xF8);
1014#ifndef PRODUCT
1015    } else if( !do_size ) {
1016      if( size != 0 ) st->print("\n\t");
1017      st->print("LEA    ESP,[ESP-8]");
1018#endif
1019    }
1020    size += 4;
1021
1022    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1023
1024    // Copy from the temp memory to the xmm reg.
1025    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1026
1027    if( cbuf ) {
1028      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1029      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1030      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1031      emit_d8(*cbuf,0x08);
1032#ifndef PRODUCT
1033    } else if( !do_size ) {
1034      if( size != 0 ) st->print("\n\t");
1035      st->print("LEA    ESP,[ESP+8]");
1036#endif
1037    }
1038    size += 4;
1039    return size;
1040  }
1041
1042  assert( size > 0, "missed a case" );
1043
1044  // --------------------------------------------------------------------
1045  // Check for second bits still needing moving.
1046  if( src_second == dst_second )
1047    return size;               // Self copy; no move
1048  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1049
1050  // Check for second word int-int move
1051  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1052    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1053
1054  // Check for second word integer store
1055  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1056    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1057
1058  // Check for second word integer load
1059  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1060    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1061
1062
1063  Unimplemented();
1064}
1065
1066#ifndef PRODUCT
1067void MachSpillCopyNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1068  implementation( NULL, ra_, false, st );
1069}
1070#endif
1071
1072void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1073  implementation( &cbuf, ra_, false, NULL );
1074}
1075
1076uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1077  return implementation( NULL, ra_, true, NULL );
1078}
1079
1080//=============================================================================
1081#ifndef PRODUCT
1082void MachNopNode::format( PhaseRegAlloc *, outputStream* st ) const {
1083  st->print("NOP \t# %d bytes pad for loops and calls", _count);
1084}
1085#endif
1086
1087void MachNopNode::emit(CodeBuffer &cbuf, PhaseRegAlloc * ) const {
1088  MacroAssembler _masm(&cbuf);
1089  __ nop(_count);
1090}
1091
1092uint MachNopNode::size(PhaseRegAlloc *) const {
1093  return _count;
1094}
1095
1096
1097//=============================================================================
1098#ifndef PRODUCT
1099void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1100  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1101  int reg = ra_->get_reg_first(this);
1102  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1103}
1104#endif
1105
1106void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1107  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1108  int reg = ra_->get_encode(this);
1109  if( offset >= 128 ) {
1110    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1111    emit_rm(cbuf, 0x2, reg, 0x04);
1112    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1113    emit_d32(cbuf, offset);
1114  }
1115  else {
1116    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1117    emit_rm(cbuf, 0x1, reg, 0x04);
1118    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1119    emit_d8(cbuf, offset);
1120  }
1121}
1122
1123uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1124  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1125  if( offset >= 128 ) {
1126    return 7;
1127  }
1128  else {
1129    return 4;
1130  }
1131}
1132
1133//=============================================================================
1134
1135// emit call stub, compiled java to interpreter
1136void emit_java_to_interp(CodeBuffer &cbuf ) {
1137  // Stub is fixed up when the corresponding call is converted from calling
1138  // compiled code to calling interpreted code.
1139  // mov rbx,0
1140  // jmp -1
1141
1142  address mark = cbuf.inst_mark();  // get mark within main instrs section
1143
1144  // Note that the code buffer's inst_mark is always relative to insts.
1145  // That's why we must use the macroassembler to generate a stub.
1146  MacroAssembler _masm(&cbuf);
1147
1148  address base =
1149  __ start_a_stub(Compile::MAX_stubs_size);
1150  if (base == NULL)  return;  // CodeBuffer::expand failed
1151  // static stub relocation stores the instruction address of the call
1152  __ relocate(static_stub_Relocation::spec(mark), RELOC_IMM32);
1153  // static stub relocation also tags the methodOop in the code-stream.
1154  __ movoop(rbx, (jobject)NULL);  // method is zapped till fixup time
1155  // This is recognized as unresolved by relocs/nativeInst/ic code
1156  __ jump(RuntimeAddress(__ pc()));
1157
1158  __ end_a_stub();
1159  // Update current stubs pointer and restore code_end.
1160}
1161// size of call stub, compiled java to interpretor
1162uint size_java_to_interp() {
1163  return 10;  // movl; jmp
1164}
1165// relocation entries for call stub, compiled java to interpretor
1166uint reloc_java_to_interp() {
1167  return 4;  // 3 in emit_java_to_interp + 1 in Java_Static_Call
1168}
1169
1170//=============================================================================
1171#ifndef PRODUCT
1172void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1173  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1174  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1175  st->print_cr("\tNOP");
1176  st->print_cr("\tNOP");
1177  if( !OptoBreakpoint )
1178    st->print_cr("\tNOP");
1179}
1180#endif
1181
1182void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1183  MacroAssembler masm(&cbuf);
1184#ifdef ASSERT
1185  uint code_size = cbuf.code_size();
1186#endif
1187  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1188  masm.jump_cc(Assembler::notEqual,
1189               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1190  /* WARNING these NOPs are critical so that verified entry point is properly
1191     aligned for patching by NativeJump::patch_verified_entry() */
1192  int nops_cnt = 2;
1193  if( !OptoBreakpoint ) // Leave space for int3
1194     nops_cnt += 1;
1195  masm.nop(nops_cnt);
1196
1197  assert(cbuf.code_size() - code_size == size(ra_), "checking code size of inline cache node");
1198}
1199
1200uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1201  return OptoBreakpoint ? 11 : 12;
1202}
1203
1204
1205//=============================================================================
1206uint size_exception_handler() {
1207  // NativeCall instruction size is the same as NativeJump.
1208  // exception handler starts out as jump and can be patched to
1209  // a call be deoptimization.  (4932387)
1210  // Note that this value is also credited (in output.cpp) to
1211  // the size of the code section.
1212  return NativeJump::instruction_size;
1213}
1214
1215// Emit exception handler code.  Stuff framesize into a register
1216// and call a VM stub routine.
1217int emit_exception_handler(CodeBuffer& cbuf) {
1218
1219  // Note that the code buffer's inst_mark is always relative to insts.
1220  // That's why we must use the macroassembler to generate a handler.
1221  MacroAssembler _masm(&cbuf);
1222  address base =
1223  __ start_a_stub(size_exception_handler());
1224  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1225  int offset = __ offset();
1226  __ jump(RuntimeAddress(OptoRuntime::exception_blob()->instructions_begin()));
1227  assert(__ offset() - offset <= (int) size_exception_handler(), "overflow");
1228  __ end_a_stub();
1229  return offset;
1230}
1231
1232uint size_deopt_handler() {
1233  // NativeCall instruction size is the same as NativeJump.
1234  // exception handler starts out as jump and can be patched to
1235  // a call be deoptimization.  (4932387)
1236  // Note that this value is also credited (in output.cpp) to
1237  // the size of the code section.
1238  return 5 + NativeJump::instruction_size; // pushl(); jmp;
1239}
1240
1241// Emit deopt handler code.
1242int emit_deopt_handler(CodeBuffer& cbuf) {
1243
1244  // Note that the code buffer's inst_mark is always relative to insts.
1245  // That's why we must use the macroassembler to generate a handler.
1246  MacroAssembler _masm(&cbuf);
1247  address base =
1248  __ start_a_stub(size_exception_handler());
1249  if (base == NULL)  return 0;  // CodeBuffer::expand failed
1250  int offset = __ offset();
1251  InternalAddress here(__ pc());
1252  __ pushptr(here.addr());
1253
1254  __ jump(RuntimeAddress(SharedRuntime::deopt_blob()->unpack()));
1255  assert(__ offset() - offset <= (int) size_deopt_handler(), "overflow");
1256  __ end_a_stub();
1257  return offset;
1258}
1259
1260
1261static void emit_double_constant(CodeBuffer& cbuf, double x) {
1262  int mark = cbuf.insts()->mark_off();
1263  MacroAssembler _masm(&cbuf);
1264  address double_address = __ double_constant(x);
1265  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1266  emit_d32_reloc(cbuf,
1267                 (int)double_address,
1268                 internal_word_Relocation::spec(double_address),
1269                 RELOC_DISP32);
1270}
1271
1272static void emit_float_constant(CodeBuffer& cbuf, float x) {
1273  int mark = cbuf.insts()->mark_off();
1274  MacroAssembler _masm(&cbuf);
1275  address float_address = __ float_constant(x);
1276  cbuf.insts()->set_mark_off(mark);  // preserve mark across masm shift
1277  emit_d32_reloc(cbuf,
1278                 (int)float_address,
1279                 internal_word_Relocation::spec(float_address),
1280                 RELOC_DISP32);
1281}
1282
1283
1284int Matcher::regnum_to_fpu_offset(int regnum) {
1285  return regnum - 32; // The FP registers are in the second chunk
1286}
1287
1288bool is_positive_zero_float(jfloat f) {
1289  return jint_cast(f) == jint_cast(0.0F);
1290}
1291
1292bool is_positive_one_float(jfloat f) {
1293  return jint_cast(f) == jint_cast(1.0F);
1294}
1295
1296bool is_positive_zero_double(jdouble d) {
1297  return jlong_cast(d) == jlong_cast(0.0);
1298}
1299
1300bool is_positive_one_double(jdouble d) {
1301  return jlong_cast(d) == jlong_cast(1.0);
1302}
1303
1304// This is UltraSparc specific, true just means we have fast l2f conversion
1305const bool Matcher::convL2FSupported(void) {
1306  return true;
1307}
1308
1309// Vector width in bytes
1310const uint Matcher::vector_width_in_bytes(void) {
1311  return UseSSE >= 2 ? 8 : 0;
1312}
1313
1314// Vector ideal reg
1315const uint Matcher::vector_ideal_reg(void) {
1316  return Op_RegD;
1317}
1318
1319// Is this branch offset short enough that a short branch can be used?
1320//
1321// NOTE: If the platform does not provide any short branch variants, then
1322//       this method should return false for offset 0.
1323bool Matcher::is_short_branch_offset(int rule, int offset) {
1324  // the short version of jmpConUCF2 contains multiple branches,
1325  // making the reach slightly less
1326  if (rule == jmpConUCF2_rule)
1327    return (-126 <= offset && offset <= 125);
1328  return (-128 <= offset && offset <= 127);
1329}
1330
1331const bool Matcher::isSimpleConstant64(jlong value) {
1332  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1333  return false;
1334}
1335
1336// The ecx parameter to rep stos for the ClearArray node is in dwords.
1337const bool Matcher::init_array_count_is_in_bytes = false;
1338
1339// Threshold size for cleararray.
1340const int Matcher::init_array_short_size = 8 * BytesPerLong;
1341
1342// Should the Matcher clone shifts on addressing modes, expecting them to
1343// be subsumed into complex addressing expressions or compute them into
1344// registers?  True for Intel but false for most RISCs
1345const bool Matcher::clone_shift_expressions = true;
1346
1347// Is it better to copy float constants, or load them directly from memory?
1348// Intel can load a float constant from a direct address, requiring no
1349// extra registers.  Most RISCs will have to materialize an address into a
1350// register first, so they would do better to copy the constant from stack.
1351const bool Matcher::rematerialize_float_constants = true;
1352
1353// If CPU can load and store mis-aligned doubles directly then no fixup is
1354// needed.  Else we split the double into 2 integer pieces and move it
1355// piece-by-piece.  Only happens when passing doubles into C code as the
1356// Java calling convention forces doubles to be aligned.
1357const bool Matcher::misaligned_doubles_ok = true;
1358
1359
1360void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1361  // Get the memory operand from the node
1362  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1363  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1364  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1365  uint opcnt     = 1;                 // First operand
1366  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1367  while( idx >= skipped+num_edges ) {
1368    skipped += num_edges;
1369    opcnt++;                          // Bump operand count
1370    assert( opcnt < numopnds, "Accessing non-existent operand" );
1371    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1372  }
1373
1374  MachOper *memory = node->_opnds[opcnt];
1375  MachOper *new_memory = NULL;
1376  switch (memory->opcode()) {
1377  case DIRECT:
1378  case INDOFFSET32X:
1379    // No transformation necessary.
1380    return;
1381  case INDIRECT:
1382    new_memory = new (C) indirect_win95_safeOper( );
1383    break;
1384  case INDOFFSET8:
1385    new_memory = new (C) indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1386    break;
1387  case INDOFFSET32:
1388    new_memory = new (C) indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1389    break;
1390  case INDINDEXOFFSET:
1391    new_memory = new (C) indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1392    break;
1393  case INDINDEXSCALE:
1394    new_memory = new (C) indIndexScale_win95_safeOper(memory->scale());
1395    break;
1396  case INDINDEXSCALEOFFSET:
1397    new_memory = new (C) indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1398    break;
1399  case LOAD_LONG_INDIRECT:
1400  case LOAD_LONG_INDOFFSET32:
1401    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1402    return;
1403  default:
1404    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1405    return;
1406  }
1407  node->_opnds[opcnt] = new_memory;
1408}
1409
1410// Advertise here if the CPU requires explicit rounding operations
1411// to implement the UseStrictFP mode.
1412const bool Matcher::strict_fp_requires_explicit_rounding = true;
1413
1414// Do floats take an entire double register or just half?
1415const bool Matcher::float_in_double = true;
1416// Do ints take an entire long register or just half?
1417const bool Matcher::int_in_long = false;
1418
1419// Return whether or not this register is ever used as an argument.  This
1420// function is used on startup to build the trampoline stubs in generateOptoStub.
1421// Registers not mentioned will be killed by the VM call in the trampoline, and
1422// arguments in those registers not be available to the callee.
1423bool Matcher::can_be_java_arg( int reg ) {
1424  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1425  if( (reg == XMM0a_num || reg == XMM1a_num) && UseSSE>=1 ) return true;
1426  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1427  return false;
1428}
1429
1430bool Matcher::is_spillable_arg( int reg ) {
1431  return can_be_java_arg(reg);
1432}
1433
1434// Register for DIVI projection of divmodI
1435RegMask Matcher::divI_proj_mask() {
1436  return EAX_REG_mask;
1437}
1438
1439// Register for MODI projection of divmodI
1440RegMask Matcher::modI_proj_mask() {
1441  return EDX_REG_mask;
1442}
1443
1444// Register for DIVL projection of divmodL
1445RegMask Matcher::divL_proj_mask() {
1446  ShouldNotReachHere();
1447  return RegMask();
1448}
1449
1450// Register for MODL projection of divmodL
1451RegMask Matcher::modL_proj_mask() {
1452  ShouldNotReachHere();
1453  return RegMask();
1454}
1455
1456%}
1457
1458//----------ENCODING BLOCK-----------------------------------------------------
1459// This block specifies the encoding classes used by the compiler to output
1460// byte streams.  Encoding classes generate functions which are called by
1461// Machine Instruction Nodes in order to generate the bit encoding of the
1462// instruction.  Operands specify their base encoding interface with the
1463// interface keyword.  There are currently supported four interfaces,
1464// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1465// operand to generate a function which returns its register number when
1466// queried.   CONST_INTER causes an operand to generate a function which
1467// returns the value of the constant when queried.  MEMORY_INTER causes an
1468// operand to generate four functions which return the Base Register, the
1469// Index Register, the Scale Value, and the Offset Value of the operand when
1470// queried.  COND_INTER causes an operand to generate six functions which
1471// return the encoding code (ie - encoding bits for the instruction)
1472// associated with each basic boolean condition for a conditional instruction.
1473// Instructions specify two basic values for encoding.  They use the
1474// ins_encode keyword to specify their encoding class (which must be one of
1475// the class names specified in the encoding block), and they use the
1476// opcode keyword to specify, in order, their primary, secondary, and
1477// tertiary opcode.  Only the opcode sections which a particular instruction
1478// needs for encoding need to be specified.
1479encode %{
1480  // Build emit functions for each basic byte or larger field in the intel
1481  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1482  // code in the enc_class source block.  Emit functions will live in the
1483  // main source block for now.  In future, we can generalize this by
1484  // adding a syntax that specifies the sizes of fields in an order,
1485  // so that the adlc can build the emit functions automagically
1486  enc_class OpcP %{             // Emit opcode
1487    emit_opcode(cbuf,$primary);
1488  %}
1489
1490  enc_class OpcS %{             // Emit opcode
1491    emit_opcode(cbuf,$secondary);
1492  %}
1493
1494  enc_class Opcode(immI d8 ) %{ // Emit opcode
1495    emit_opcode(cbuf,$d8$$constant);
1496  %}
1497
1498  enc_class SizePrefix %{
1499    emit_opcode(cbuf,0x66);
1500  %}
1501
1502  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
1503    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1504  %}
1505
1506  enc_class OpcRegReg (immI opcode, eRegI dst, eRegI src) %{    // OpcRegReg(Many)
1507    emit_opcode(cbuf,$opcode$$constant);
1508    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1509  %}
1510
1511  enc_class mov_r32_imm0( eRegI dst ) %{
1512    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1513    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1514  %}
1515
1516  enc_class cdq_enc %{
1517    // Full implementation of Java idiv and irem; checks for
1518    // special case as described in JVM spec., p.243 & p.271.
1519    //
1520    //         normal case                           special case
1521    //
1522    // input : rax,: dividend                         min_int
1523    //         reg: divisor                          -1
1524    //
1525    // output: rax,: quotient  (= rax, idiv reg)       min_int
1526    //         rdx: remainder (= rax, irem reg)       0
1527    //
1528    //  Code sequnce:
1529    //
1530    //  81 F8 00 00 00 80    cmp         rax,80000000h
1531    //  0F 85 0B 00 00 00    jne         normal_case
1532    //  33 D2                xor         rdx,edx
1533    //  83 F9 FF             cmp         rcx,0FFh
1534    //  0F 84 03 00 00 00    je          done
1535    //                  normal_case:
1536    //  99                   cdq
1537    //  F7 F9                idiv        rax,ecx
1538    //                  done:
1539    //
1540    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1541    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1542    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1543    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1544    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1545    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1546    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1547    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1548    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1549    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1550    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1551    // normal_case:
1552    emit_opcode(cbuf,0x99);                                         // cdq
1553    // idiv (note: must be emitted by the user of this rule)
1554    // normal:
1555  %}
1556
1557  // Dense encoding for older common ops
1558  enc_class Opc_plus(immI opcode, eRegI reg) %{
1559    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1560  %}
1561
1562
1563  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1564  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1565    // Check for 8-bit immediate, and set sign extend bit in opcode
1566    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1567      emit_opcode(cbuf, $primary | 0x02);
1568    }
1569    else {                          // If 32-bit immediate
1570      emit_opcode(cbuf, $primary);
1571    }
1572  %}
1573
1574  enc_class OpcSErm (eRegI dst, immI imm) %{    // OpcSEr/m
1575    // Emit primary opcode and set sign-extend bit
1576    // Check for 8-bit immediate, and set sign extend bit in opcode
1577    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1578      emit_opcode(cbuf, $primary | 0x02);    }
1579    else {                          // If 32-bit immediate
1580      emit_opcode(cbuf, $primary);
1581    }
1582    // Emit r/m byte with secondary opcode, after primary opcode.
1583    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1584  %}
1585
1586  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1587    // Check for 8-bit immediate, and set sign extend bit in opcode
1588    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1589      $$$emit8$imm$$constant;
1590    }
1591    else {                          // If 32-bit immediate
1592      // Output immediate
1593      $$$emit32$imm$$constant;
1594    }
1595  %}
1596
1597  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1598    // Emit primary opcode and set sign-extend bit
1599    // Check for 8-bit immediate, and set sign extend bit in opcode
1600    int con = (int)$imm$$constant; // Throw away top bits
1601    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1602    // Emit r/m byte with secondary opcode, after primary opcode.
1603    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1604    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1605    else                               emit_d32(cbuf,con);
1606  %}
1607
1608  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1609    // Emit primary opcode and set sign-extend bit
1610    // Check for 8-bit immediate, and set sign extend bit in opcode
1611    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1612    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1613    // Emit r/m byte with tertiary opcode, after primary opcode.
1614    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1615    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1616    else                               emit_d32(cbuf,con);
1617  %}
1618
1619  enc_class Lbl (label labl) %{ // JMP, CALL
1620    Label *l = $labl$$label;
1621    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1622  %}
1623
1624  enc_class LblShort (label labl) %{ // JMP, CALL
1625    Label *l = $labl$$label;
1626    int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1627    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1628    emit_d8(cbuf, disp);
1629  %}
1630
1631  enc_class OpcSReg (eRegI dst) %{    // BSWAP
1632    emit_cc(cbuf, $secondary, $dst$$reg );
1633  %}
1634
1635  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1636    int destlo = $dst$$reg;
1637    int desthi = HIGH_FROM_LOW(destlo);
1638    // bswap lo
1639    emit_opcode(cbuf, 0x0F);
1640    emit_cc(cbuf, 0xC8, destlo);
1641    // bswap hi
1642    emit_opcode(cbuf, 0x0F);
1643    emit_cc(cbuf, 0xC8, desthi);
1644    // xchg lo and hi
1645    emit_opcode(cbuf, 0x87);
1646    emit_rm(cbuf, 0x3, destlo, desthi);
1647  %}
1648
1649  enc_class RegOpc (eRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1650    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1651  %}
1652
1653  enc_class Jcc (cmpOp cop, label labl) %{    // JCC
1654    Label *l = $labl$$label;
1655    $$$emit8$primary;
1656    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1657    emit_d32(cbuf, l ? (l->loc_pos() - (cbuf.code_size()+4)) : 0);
1658  %}
1659
1660  enc_class JccShort (cmpOp cop, label labl) %{    // JCC
1661    Label *l = $labl$$label;
1662    emit_cc(cbuf, $primary, $cop$$cmpcode);
1663    int disp = l ? (l->loc_pos() - (cbuf.code_size()+1)) : 0;
1664    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
1665    emit_d8(cbuf, disp);
1666  %}
1667
1668  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1669    $$$emit8$primary;
1670    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1671  %}
1672
1673  enc_class enc_cmov_d(cmpOp cop, regD src ) %{ // CMOV
1674    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1675    emit_d8(cbuf, op >> 8 );
1676    emit_d8(cbuf, op & 255);
1677  %}
1678
1679  // emulate a CMOV with a conditional branch around a MOV
1680  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1681    // Invert sense of branch from sense of CMOV
1682    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1683    emit_d8( cbuf, $brOffs$$constant );
1684  %}
1685
1686  enc_class enc_PartialSubtypeCheck( ) %{
1687    Register Redi = as_Register(EDI_enc); // result register
1688    Register Reax = as_Register(EAX_enc); // super class
1689    Register Recx = as_Register(ECX_enc); // killed
1690    Register Resi = as_Register(ESI_enc); // sub class
1691    Label hit, miss;
1692
1693    MacroAssembler _masm(&cbuf);
1694    // Compare super with sub directly, since super is not in its own SSA.
1695    // The compiler used to emit this test, but we fold it in here,
1696    // to allow platform-specific tweaking on sparc.
1697    __ cmpptr(Reax, Resi);
1698    __ jcc(Assembler::equal, hit);
1699#ifndef PRODUCT
1700    __ incrementl(ExternalAddress((address)&SharedRuntime::_partial_subtype_ctr));
1701#endif //PRODUCT
1702    __ movptr(Redi,Address(Resi,sizeof(oopDesc) + Klass::secondary_supers_offset_in_bytes()));
1703    __ movl(Recx,Address(Redi,arrayOopDesc::length_offset_in_bytes()));
1704    __ addptr(Redi,arrayOopDesc::base_offset_in_bytes(T_OBJECT));
1705    __ repne_scan();
1706    __ jcc(Assembler::notEqual, miss);
1707    __ movptr(Address(Resi,sizeof(oopDesc) + Klass::secondary_super_cache_offset_in_bytes()),Reax);
1708    __ bind(hit);
1709    if( $primary )
1710      __ xorptr(Redi,Redi);
1711    __ bind(miss);
1712  %}
1713
1714  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1715    MacroAssembler masm(&cbuf);
1716    int start = masm.offset();
1717    if (UseSSE >= 2) {
1718      if (VerifyFPU) {
1719        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1720      }
1721    } else {
1722      // External c_calling_convention expects the FPU stack to be 'clean'.
1723      // Compiled code leaves it dirty.  Do cleanup now.
1724      masm.empty_FPU_stack();
1725    }
1726    if (sizeof_FFree_Float_Stack_All == -1) {
1727      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1728    } else {
1729      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1730    }
1731  %}
1732
1733  enc_class Verify_FPU_For_Leaf %{
1734    if( VerifyFPU ) {
1735      MacroAssembler masm(&cbuf);
1736      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1737    }
1738  %}
1739
1740  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1741    // This is the instruction starting address for relocation info.
1742    cbuf.set_inst_mark();
1743    $$$emit8$primary;
1744    // CALL directly to the runtime
1745    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1746                runtime_call_Relocation::spec(), RELOC_IMM32 );
1747
1748    if (UseSSE >= 2) {
1749      MacroAssembler _masm(&cbuf);
1750      BasicType rt = tf()->return_type();
1751
1752      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1753        // A C runtime call where the return value is unused.  In SSE2+
1754        // mode the result needs to be removed from the FPU stack.  It's
1755        // likely that this function call could be removed by the
1756        // optimizer if the C function is a pure function.
1757        __ ffree(0);
1758      } else if (rt == T_FLOAT) {
1759        __ lea(rsp, Address(rsp, -4));
1760        __ fstp_s(Address(rsp, 0));
1761        __ movflt(xmm0, Address(rsp, 0));
1762        __ lea(rsp, Address(rsp,  4));
1763      } else if (rt == T_DOUBLE) {
1764        __ lea(rsp, Address(rsp, -8));
1765        __ fstp_d(Address(rsp, 0));
1766        __ movdbl(xmm0, Address(rsp, 0));
1767        __ lea(rsp, Address(rsp,  8));
1768      }
1769    }
1770  %}
1771
1772
1773  enc_class pre_call_FPU %{
1774    // If method sets FPU control word restore it here
1775    if( Compile::current()->in_24_bit_fp_mode() ) {
1776      MacroAssembler masm(&cbuf);
1777      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1778    }
1779  %}
1780
1781  enc_class post_call_FPU %{
1782    // If method sets FPU control word do it here also
1783    if( Compile::current()->in_24_bit_fp_mode() ) {
1784      MacroAssembler masm(&cbuf);
1785      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1786    }
1787  %}
1788
1789  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1790    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1791    // who we intended to call.
1792    cbuf.set_inst_mark();
1793    $$$emit8$primary;
1794    if ( !_method ) {
1795      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1796                     runtime_call_Relocation::spec(), RELOC_IMM32 );
1797    } else if(_optimized_virtual) {
1798      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1799                     opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1800    } else {
1801      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1802                     static_call_Relocation::spec(), RELOC_IMM32 );
1803    }
1804    if( _method ) {  // Emit stub for static call
1805      emit_java_to_interp(cbuf);
1806    }
1807  %}
1808
1809  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1810    // !!!!!
1811    // Generate  "Mov EAX,0x00", placeholder instruction to load oop-info
1812    // emit_call_dynamic_prologue( cbuf );
1813    cbuf.set_inst_mark();
1814    emit_opcode(cbuf, 0xB8 + EAX_enc);        // mov    EAX,-1
1815    emit_d32_reloc(cbuf, (int)Universe::non_oop_word(), oop_Relocation::spec_for_immediate(), RELOC_IMM32);
1816    address  virtual_call_oop_addr = cbuf.inst_mark();
1817    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1818    // who we intended to call.
1819    cbuf.set_inst_mark();
1820    $$$emit8$primary;
1821    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.code_end()) - 4),
1822                virtual_call_Relocation::spec(virtual_call_oop_addr), RELOC_IMM32 );
1823  %}
1824
1825  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1826    int disp = in_bytes(methodOopDesc::from_compiled_offset());
1827    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1828
1829    // CALL *[EAX+in_bytes(methodOopDesc::from_compiled_code_entry_point_offset())]
1830    cbuf.set_inst_mark();
1831    $$$emit8$primary;
1832    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1833    emit_d8(cbuf, disp);             // Displacement
1834
1835  %}
1836
1837  enc_class Xor_Reg (eRegI dst) %{
1838    emit_opcode(cbuf, 0x33);
1839    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
1840  %}
1841
1842//   Following encoding is no longer used, but may be restored if calling
1843//   convention changes significantly.
1844//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1845//
1846//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1847//     // int ic_reg     = Matcher::inline_cache_reg();
1848//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1849//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1850//     // int imo_encode = Matcher::_regEncode[imo_reg];
1851//
1852//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1853//     // // so we load it immediately before the call
1854//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1855//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1856//
1857//     // xor rbp,ebp
1858//     emit_opcode(cbuf, 0x33);
1859//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1860//
1861//     // CALL to interpreter.
1862//     cbuf.set_inst_mark();
1863//     $$$emit8$primary;
1864//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.code_end()) - 4),
1865//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1866//   %}
1867
1868  enc_class RegOpcImm (eRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1869    $$$emit8$primary;
1870    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1871    $$$emit8$shift$$constant;
1872  %}
1873
1874  enc_class LdImmI (eRegI dst, immI src) %{    // Load Immediate
1875    // Load immediate does not have a zero or sign extended version
1876    // for 8-bit immediates
1877    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1878    $$$emit32$src$$constant;
1879  %}
1880
1881  enc_class LdImmP (eRegI dst, immI src) %{    // Load Immediate
1882    // Load immediate does not have a zero or sign extended version
1883    // for 8-bit immediates
1884    emit_opcode(cbuf, $primary + $dst$$reg);
1885    $$$emit32$src$$constant;
1886  %}
1887
1888  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1889    // Load immediate does not have a zero or sign extended version
1890    // for 8-bit immediates
1891    int dst_enc = $dst$$reg;
1892    int src_con = $src$$constant & 0x0FFFFFFFFL;
1893    if (src_con == 0) {
1894      // xor dst, dst
1895      emit_opcode(cbuf, 0x33);
1896      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1897    } else {
1898      emit_opcode(cbuf, $primary + dst_enc);
1899      emit_d32(cbuf, src_con);
1900    }
1901  %}
1902
1903  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
1904    // Load immediate does not have a zero or sign extended version
1905    // for 8-bit immediates
1906    int dst_enc = $dst$$reg + 2;
1907    int src_con = ((julong)($src$$constant)) >> 32;
1908    if (src_con == 0) {
1909      // xor dst, dst
1910      emit_opcode(cbuf, 0x33);
1911      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1912    } else {
1913      emit_opcode(cbuf, $primary + dst_enc);
1914      emit_d32(cbuf, src_con);
1915    }
1916  %}
1917
1918
1919  enc_class LdImmD (immD src) %{    // Load Immediate
1920    if( is_positive_zero_double($src$$constant)) {
1921      // FLDZ
1922      emit_opcode(cbuf,0xD9);
1923      emit_opcode(cbuf,0xEE);
1924    } else if( is_positive_one_double($src$$constant)) {
1925      // FLD1
1926      emit_opcode(cbuf,0xD9);
1927      emit_opcode(cbuf,0xE8);
1928    } else {
1929      emit_opcode(cbuf,0xDD);
1930      emit_rm(cbuf, 0x0, 0x0, 0x5);
1931      emit_double_constant(cbuf, $src$$constant);
1932    }
1933  %}
1934
1935
1936  enc_class LdImmF (immF src) %{    // Load Immediate
1937    if( is_positive_zero_float($src$$constant)) {
1938      emit_opcode(cbuf,0xD9);
1939      emit_opcode(cbuf,0xEE);
1940    } else if( is_positive_one_float($src$$constant)) {
1941      emit_opcode(cbuf,0xD9);
1942      emit_opcode(cbuf,0xE8);
1943    } else {
1944      $$$emit8$primary;
1945      // Load immediate does not have a zero or sign extended version
1946      // for 8-bit immediates
1947      // First load to TOS, then move to dst
1948      emit_rm(cbuf, 0x0, 0x0, 0x5);
1949      emit_float_constant(cbuf, $src$$constant);
1950    }
1951  %}
1952
1953  enc_class LdImmX (regX dst, immXF con) %{    // Load Immediate
1954    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1955    emit_float_constant(cbuf, $con$$constant);
1956  %}
1957
1958  enc_class LdImmXD (regXD dst, immXD con) %{    // Load Immediate
1959    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1960    emit_double_constant(cbuf, $con$$constant);
1961  %}
1962
1963  enc_class load_conXD (regXD dst, immXD con) %{ // Load double constant
1964    // UseXmmLoadAndClearUpper ? movsd(dst, con) : movlpd(dst, con)
1965    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
1966    emit_opcode(cbuf, 0x0F);
1967    emit_opcode(cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
1968    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
1969    emit_double_constant(cbuf, $con$$constant);
1970  %}
1971
1972  enc_class Opc_MemImm_F(immF src) %{
1973    cbuf.set_inst_mark();
1974    $$$emit8$primary;
1975    emit_rm(cbuf, 0x0, $secondary, 0x5);
1976    emit_float_constant(cbuf, $src$$constant);
1977  %}
1978
1979
1980  enc_class MovI2X_reg(regX dst, eRegI src) %{
1981    emit_opcode(cbuf, 0x66 );     // MOVD dst,src
1982    emit_opcode(cbuf, 0x0F );
1983    emit_opcode(cbuf, 0x6E );
1984    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1985  %}
1986
1987  enc_class MovX2I_reg(eRegI dst, regX src) %{
1988    emit_opcode(cbuf, 0x66 );     // MOVD dst,src
1989    emit_opcode(cbuf, 0x0F );
1990    emit_opcode(cbuf, 0x7E );
1991    emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
1992  %}
1993
1994  enc_class MovL2XD_reg(regXD dst, eRegL src, regXD tmp) %{
1995    { // MOVD $dst,$src.lo
1996      emit_opcode(cbuf,0x66);
1997      emit_opcode(cbuf,0x0F);
1998      emit_opcode(cbuf,0x6E);
1999      emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2000    }
2001    { // MOVD $tmp,$src.hi
2002      emit_opcode(cbuf,0x66);
2003      emit_opcode(cbuf,0x0F);
2004      emit_opcode(cbuf,0x6E);
2005      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2006    }
2007    { // PUNPCKLDQ $dst,$tmp
2008      emit_opcode(cbuf,0x66);
2009      emit_opcode(cbuf,0x0F);
2010      emit_opcode(cbuf,0x62);
2011      emit_rm(cbuf, 0x3, $dst$$reg, $tmp$$reg);
2012     }
2013  %}
2014
2015  enc_class MovXD2L_reg(eRegL dst, regXD src, regXD tmp) %{
2016    { // MOVD $dst.lo,$src
2017      emit_opcode(cbuf,0x66);
2018      emit_opcode(cbuf,0x0F);
2019      emit_opcode(cbuf,0x7E);
2020      emit_rm(cbuf, 0x3, $src$$reg, $dst$$reg);
2021    }
2022    { // PSHUFLW $tmp,$src,0x4E  (01001110b)
2023      emit_opcode(cbuf,0xF2);
2024      emit_opcode(cbuf,0x0F);
2025      emit_opcode(cbuf,0x70);
2026      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2027      emit_d8(cbuf, 0x4E);
2028    }
2029    { // MOVD $dst.hi,$tmp
2030      emit_opcode(cbuf,0x66);
2031      emit_opcode(cbuf,0x0F);
2032      emit_opcode(cbuf,0x7E);
2033      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
2034    }
2035  %}
2036
2037
2038  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2039  enc_class enc_Copy( eRegI dst, eRegI src ) %{
2040    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2041  %}
2042
2043  enc_class enc_CopyL_Lo( eRegI dst, eRegL src ) %{
2044    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2045  %}
2046
2047  // Encode xmm reg-reg copy.  If it is useless, then empty encoding.
2048  enc_class enc_CopyXD( RegXD dst, RegXD src ) %{
2049    encode_CopyXD( cbuf, $dst$$reg, $src$$reg );
2050  %}
2051
2052  enc_class RegReg (eRegI dst, eRegI src) %{    // RegReg(Many)
2053    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2054  %}
2055
2056  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2057    $$$emit8$primary;
2058    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2059  %}
2060
2061  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2062    $$$emit8$secondary;
2063    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2064  %}
2065
2066  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2067    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2068  %}
2069
2070  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2071    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2072  %}
2073
2074  enc_class RegReg_HiLo( eRegL src, eRegI dst ) %{
2075    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2076  %}
2077
2078  enc_class Con32 (immI src) %{    // Con32(storeImmI)
2079    // Output immediate
2080    $$$emit32$src$$constant;
2081  %}
2082
2083  enc_class Con32F_as_bits(immF src) %{        // storeF_imm
2084    // Output Float immediate bits
2085    jfloat jf = $src$$constant;
2086    int    jf_as_bits = jint_cast( jf );
2087    emit_d32(cbuf, jf_as_bits);
2088  %}
2089
2090  enc_class Con32XF_as_bits(immXF src) %{      // storeX_imm
2091    // Output Float immediate bits
2092    jfloat jf = $src$$constant;
2093    int    jf_as_bits = jint_cast( jf );
2094    emit_d32(cbuf, jf_as_bits);
2095  %}
2096
2097  enc_class Con16 (immI src) %{    // Con16(storeImmI)
2098    // Output immediate
2099    $$$emit16$src$$constant;
2100  %}
2101
2102  enc_class Con_d32(immI src) %{
2103    emit_d32(cbuf,$src$$constant);
2104  %}
2105
2106  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2107    // Output immediate memory reference
2108    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2109    emit_d32(cbuf, 0x00);
2110  %}
2111
2112  enc_class lock_prefix( ) %{
2113    if( os::is_MP() )
2114      emit_opcode(cbuf,0xF0);         // [Lock]
2115  %}
2116
2117  // Cmp-xchg long value.
2118  // Note: we need to swap rbx, and rcx before and after the
2119  //       cmpxchg8 instruction because the instruction uses
2120  //       rcx as the high order word of the new value to store but
2121  //       our register encoding uses rbx,.
2122  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2123
2124    // XCHG  rbx,ecx
2125    emit_opcode(cbuf,0x87);
2126    emit_opcode(cbuf,0xD9);
2127    // [Lock]
2128    if( os::is_MP() )
2129      emit_opcode(cbuf,0xF0);
2130    // CMPXCHG8 [Eptr]
2131    emit_opcode(cbuf,0x0F);
2132    emit_opcode(cbuf,0xC7);
2133    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2134    // XCHG  rbx,ecx
2135    emit_opcode(cbuf,0x87);
2136    emit_opcode(cbuf,0xD9);
2137  %}
2138
2139  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2140    // [Lock]
2141    if( os::is_MP() )
2142      emit_opcode(cbuf,0xF0);
2143
2144    // CMPXCHG [Eptr]
2145    emit_opcode(cbuf,0x0F);
2146    emit_opcode(cbuf,0xB1);
2147    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2148  %}
2149
2150  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2151    int res_encoding = $res$$reg;
2152
2153    // MOV  res,0
2154    emit_opcode( cbuf, 0xB8 + res_encoding);
2155    emit_d32( cbuf, 0 );
2156    // JNE,s  fail
2157    emit_opcode(cbuf,0x75);
2158    emit_d8(cbuf, 5 );
2159    // MOV  res,1
2160    emit_opcode( cbuf, 0xB8 + res_encoding);
2161    emit_d32( cbuf, 1 );
2162    // fail:
2163  %}
2164
2165  enc_class set_instruction_start( ) %{
2166    cbuf.set_inst_mark();            // Mark start of opcode for reloc info in mem operand
2167  %}
2168
2169  enc_class RegMem (eRegI ereg, memory mem) %{    // emit_reg_mem
2170    int reg_encoding = $ereg$$reg;
2171    int base  = $mem$$base;
2172    int index = $mem$$index;
2173    int scale = $mem$$scale;
2174    int displace = $mem$$disp;
2175    bool disp_is_oop = $mem->disp_is_oop();
2176    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2177  %}
2178
2179  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2180    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2181    int base  = $mem$$base;
2182    int index = $mem$$index;
2183    int scale = $mem$$scale;
2184    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2185    assert( !$mem->disp_is_oop(), "Cannot add 4 to oop" );
2186    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, false/*disp_is_oop*/);
2187  %}
2188
2189  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2190    int r1, r2;
2191    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2192    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2193    emit_opcode(cbuf,0x0F);
2194    emit_opcode(cbuf,$tertiary);
2195    emit_rm(cbuf, 0x3, r1, r2);
2196    emit_d8(cbuf,$cnt$$constant);
2197    emit_d8(cbuf,$primary);
2198    emit_rm(cbuf, 0x3, $secondary, r1);
2199    emit_d8(cbuf,$cnt$$constant);
2200  %}
2201
2202  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2203    emit_opcode( cbuf, 0x8B ); // Move
2204    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2205    emit_d8(cbuf,$primary);
2206    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2207    emit_d8(cbuf,$cnt$$constant-32);
2208    emit_d8(cbuf,$primary);
2209    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2210    emit_d8(cbuf,31);
2211  %}
2212
2213  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2214    int r1, r2;
2215    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2216    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2217
2218    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2219    emit_rm(cbuf, 0x3, r1, r2);
2220    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2221      emit_opcode(cbuf,$primary);
2222      emit_rm(cbuf, 0x3, $secondary, r1);
2223      emit_d8(cbuf,$cnt$$constant-32);
2224    }
2225    emit_opcode(cbuf,0x33);  // XOR r2,r2
2226    emit_rm(cbuf, 0x3, r2, r2);
2227  %}
2228
2229  // Clone of RegMem but accepts an extra parameter to access each
2230  // half of a double in memory; it never needs relocation info.
2231  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, eRegI rm_reg) %{
2232    emit_opcode(cbuf,$opcode$$constant);
2233    int reg_encoding = $rm_reg$$reg;
2234    int base     = $mem$$base;
2235    int index    = $mem$$index;
2236    int scale    = $mem$$scale;
2237    int displace = $mem$$disp + $disp_for_half$$constant;
2238    bool disp_is_oop = false;
2239    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2240  %}
2241
2242  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2243  //
2244  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2245  // and it never needs relocation information.
2246  // Frequently used to move data between FPU's Stack Top and memory.
2247  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2248    int rm_byte_opcode = $rm_opcode$$constant;
2249    int base     = $mem$$base;
2250    int index    = $mem$$index;
2251    int scale    = $mem$$scale;
2252    int displace = $mem$$disp;
2253    assert( !$mem->disp_is_oop(), "No oops here because no relo info allowed" );
2254    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, false);
2255  %}
2256
2257  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2258    int rm_byte_opcode = $rm_opcode$$constant;
2259    int base     = $mem$$base;
2260    int index    = $mem$$index;
2261    int scale    = $mem$$scale;
2262    int displace = $mem$$disp;
2263    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2264    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
2265  %}
2266
2267  enc_class RegLea (eRegI dst, eRegI src0, immI src1 ) %{    // emit_reg_lea
2268    int reg_encoding = $dst$$reg;
2269    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2270    int index        = 0x04;            // 0x04 indicates no index
2271    int scale        = 0x00;            // 0x00 indicates no scale
2272    int displace     = $src1$$constant; // 0x00 indicates no displacement
2273    bool disp_is_oop = false;
2274    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2275  %}
2276
2277  enc_class min_enc (eRegI dst, eRegI src) %{    // MIN
2278    // Compare dst,src
2279    emit_opcode(cbuf,0x3B);
2280    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2281    // jmp dst < src around move
2282    emit_opcode(cbuf,0x7C);
2283    emit_d8(cbuf,2);
2284    // move dst,src
2285    emit_opcode(cbuf,0x8B);
2286    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2287  %}
2288
2289  enc_class max_enc (eRegI dst, eRegI src) %{    // MAX
2290    // Compare dst,src
2291    emit_opcode(cbuf,0x3B);
2292    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2293    // jmp dst > src around move
2294    emit_opcode(cbuf,0x7F);
2295    emit_d8(cbuf,2);
2296    // move dst,src
2297    emit_opcode(cbuf,0x8B);
2298    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2299  %}
2300
2301  enc_class enc_FP_store(memory mem, regD src) %{
2302    // If src is FPR1, we can just FST to store it.
2303    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2304    int reg_encoding = 0x2; // Just store
2305    int base  = $mem$$base;
2306    int index = $mem$$index;
2307    int scale = $mem$$scale;
2308    int displace = $mem$$disp;
2309    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
2310    if( $src$$reg != FPR1L_enc ) {
2311      reg_encoding = 0x3;  // Store & pop
2312      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2313      emit_d8( cbuf, 0xC0-1+$src$$reg );
2314    }
2315    cbuf.set_inst_mark();       // Mark start of opcode for reloc info in mem operand
2316    emit_opcode(cbuf,$primary);
2317    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2318  %}
2319
2320  enc_class neg_reg(eRegI dst) %{
2321    // NEG $dst
2322    emit_opcode(cbuf,0xF7);
2323    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2324  %}
2325
2326  enc_class setLT_reg(eCXRegI dst) %{
2327    // SETLT $dst
2328    emit_opcode(cbuf,0x0F);
2329    emit_opcode(cbuf,0x9C);
2330    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2331  %}
2332
2333  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2334    int tmpReg = $tmp$$reg;
2335
2336    // SUB $p,$q
2337    emit_opcode(cbuf,0x2B);
2338    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2339    // SBB $tmp,$tmp
2340    emit_opcode(cbuf,0x1B);
2341    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2342    // AND $tmp,$y
2343    emit_opcode(cbuf,0x23);
2344    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2345    // ADD $p,$tmp
2346    emit_opcode(cbuf,0x03);
2347    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2348  %}
2349
2350  enc_class enc_cmpLTP_mem(eRegI p, eRegI q, memory mem, eCXRegI tmp) %{    // cadd_cmpLT
2351    int tmpReg = $tmp$$reg;
2352
2353    // SUB $p,$q
2354    emit_opcode(cbuf,0x2B);
2355    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2356    // SBB $tmp,$tmp
2357    emit_opcode(cbuf,0x1B);
2358    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2359    // AND $tmp,$y
2360    cbuf.set_inst_mark();       // Mark start of opcode for reloc info in mem operand
2361    emit_opcode(cbuf,0x23);
2362    int reg_encoding = tmpReg;
2363    int base  = $mem$$base;
2364    int index = $mem$$index;
2365    int scale = $mem$$scale;
2366    int displace = $mem$$disp;
2367    bool disp_is_oop = $mem->disp_is_oop();
2368    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_is_oop);
2369    // ADD $p,$tmp
2370    emit_opcode(cbuf,0x03);
2371    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2372  %}
2373
2374  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2375    // TEST shift,32
2376    emit_opcode(cbuf,0xF7);
2377    emit_rm(cbuf, 0x3, 0, ECX_enc);
2378    emit_d32(cbuf,0x20);
2379    // JEQ,s small
2380    emit_opcode(cbuf, 0x74);
2381    emit_d8(cbuf, 0x04);
2382    // MOV    $dst.hi,$dst.lo
2383    emit_opcode( cbuf, 0x8B );
2384    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2385    // CLR    $dst.lo
2386    emit_opcode(cbuf, 0x33);
2387    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2388// small:
2389    // SHLD   $dst.hi,$dst.lo,$shift
2390    emit_opcode(cbuf,0x0F);
2391    emit_opcode(cbuf,0xA5);
2392    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2393    // SHL    $dst.lo,$shift"
2394    emit_opcode(cbuf,0xD3);
2395    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2396  %}
2397
2398  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2399    // TEST shift,32
2400    emit_opcode(cbuf,0xF7);
2401    emit_rm(cbuf, 0x3, 0, ECX_enc);
2402    emit_d32(cbuf,0x20);
2403    // JEQ,s small
2404    emit_opcode(cbuf, 0x74);
2405    emit_d8(cbuf, 0x04);
2406    // MOV    $dst.lo,$dst.hi
2407    emit_opcode( cbuf, 0x8B );
2408    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2409    // CLR    $dst.hi
2410    emit_opcode(cbuf, 0x33);
2411    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2412// small:
2413    // SHRD   $dst.lo,$dst.hi,$shift
2414    emit_opcode(cbuf,0x0F);
2415    emit_opcode(cbuf,0xAD);
2416    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2417    // SHR    $dst.hi,$shift"
2418    emit_opcode(cbuf,0xD3);
2419    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2420  %}
2421
2422  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2423    // TEST shift,32
2424    emit_opcode(cbuf,0xF7);
2425    emit_rm(cbuf, 0x3, 0, ECX_enc);
2426    emit_d32(cbuf,0x20);
2427    // JEQ,s small
2428    emit_opcode(cbuf, 0x74);
2429    emit_d8(cbuf, 0x05);
2430    // MOV    $dst.lo,$dst.hi
2431    emit_opcode( cbuf, 0x8B );
2432    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2433    // SAR    $dst.hi,31
2434    emit_opcode(cbuf, 0xC1);
2435    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2436    emit_d8(cbuf, 0x1F );
2437// small:
2438    // SHRD   $dst.lo,$dst.hi,$shift
2439    emit_opcode(cbuf,0x0F);
2440    emit_opcode(cbuf,0xAD);
2441    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2442    // SAR    $dst.hi,$shift"
2443    emit_opcode(cbuf,0xD3);
2444    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2445  %}
2446
2447
2448  // ----------------- Encodings for floating point unit -----------------
2449  // May leave result in FPU-TOS or FPU reg depending on opcodes
2450  enc_class OpcReg_F (regF src) %{    // FMUL, FDIV
2451    $$$emit8$primary;
2452    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2453  %}
2454
2455  // Pop argument in FPR0 with FSTP ST(0)
2456  enc_class PopFPU() %{
2457    emit_opcode( cbuf, 0xDD );
2458    emit_d8( cbuf, 0xD8 );
2459  %}
2460
2461  // !!!!! equivalent to Pop_Reg_F
2462  enc_class Pop_Reg_D( regD dst ) %{
2463    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2464    emit_d8( cbuf, 0xD8+$dst$$reg );
2465  %}
2466
2467  enc_class Push_Reg_D( regD dst ) %{
2468    emit_opcode( cbuf, 0xD9 );
2469    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2470  %}
2471
2472  enc_class strictfp_bias1( regD dst ) %{
2473    emit_opcode( cbuf, 0xDB );           // FLD m80real
2474    emit_opcode( cbuf, 0x2D );
2475    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2476    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2477    emit_opcode( cbuf, 0xC8+$dst$$reg );
2478  %}
2479
2480  enc_class strictfp_bias2( regD dst ) %{
2481    emit_opcode( cbuf, 0xDB );           // FLD m80real
2482    emit_opcode( cbuf, 0x2D );
2483    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2484    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2485    emit_opcode( cbuf, 0xC8+$dst$$reg );
2486  %}
2487
2488  // Special case for moving an integer register to a stack slot.
2489  enc_class OpcPRegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2490    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2491  %}
2492
2493  // Special case for moving a register to a stack slot.
2494  enc_class RegSS( stackSlotI dst, eRegI src ) %{ // RegSS
2495    // Opcode already emitted
2496    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2497    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2498    emit_d32(cbuf, $dst$$disp);   // Displacement
2499  %}
2500
2501  // Push the integer in stackSlot 'src' onto FP-stack
2502  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2503    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2504  %}
2505
2506  // Push the float in stackSlot 'src' onto FP-stack
2507  enc_class Push_Mem_F( memory src ) %{    // FLD_S   [ESP+src]
2508    store_to_stackslot( cbuf, 0xD9, 0x00, $src$$disp );
2509  %}
2510
2511  // Push the double in stackSlot 'src' onto FP-stack
2512  enc_class Push_Mem_D( memory src ) %{    // FLD_D   [ESP+src]
2513    store_to_stackslot( cbuf, 0xDD, 0x00, $src$$disp );
2514  %}
2515
2516  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2517  enc_class Pop_Mem_F( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2518    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2519  %}
2520
2521  // Same as Pop_Mem_F except for opcode
2522  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2523  enc_class Pop_Mem_D( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2524    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2525  %}
2526
2527  enc_class Pop_Reg_F( regF dst ) %{
2528    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2529    emit_d8( cbuf, 0xD8+$dst$$reg );
2530  %}
2531
2532  enc_class Push_Reg_F( regF dst ) %{
2533    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2534    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2535  %}
2536
2537  // Push FPU's float to a stack-slot, and pop FPU-stack
2538  enc_class Pop_Mem_Reg_F( stackSlotF dst, regF src ) %{
2539    int pop = 0x02;
2540    if ($src$$reg != FPR1L_enc) {
2541      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2542      emit_d8( cbuf, 0xC0-1+$src$$reg );
2543      pop = 0x03;
2544    }
2545    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2546  %}
2547
2548  // Push FPU's double to a stack-slot, and pop FPU-stack
2549  enc_class Pop_Mem_Reg_D( stackSlotD dst, regD src ) %{
2550    int pop = 0x02;
2551    if ($src$$reg != FPR1L_enc) {
2552      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2553      emit_d8( cbuf, 0xC0-1+$src$$reg );
2554      pop = 0x03;
2555    }
2556    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2557  %}
2558
2559  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2560  enc_class Pop_Reg_Reg_D( regD dst, regF src ) %{
2561    int pop = 0xD0 - 1; // -1 since we skip FLD
2562    if ($src$$reg != FPR1L_enc) {
2563      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2564      emit_d8( cbuf, 0xC0-1+$src$$reg );
2565      pop = 0xD8;
2566    }
2567    emit_opcode( cbuf, 0xDD );
2568    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2569  %}
2570
2571
2572  enc_class Mul_Add_F( regF dst, regF src, regF src1, regF src2 ) %{
2573    MacroAssembler masm(&cbuf);
2574    masm.fld_s(  $src1$$reg-1);   // nothing at TOS, load TOS from src1.reg
2575    masm.fmul(   $src2$$reg+0);   // value at TOS
2576    masm.fadd(   $src$$reg+0);    // value at TOS
2577    masm.fstp_d( $dst$$reg+0);    // value at TOS, popped off after store
2578  %}
2579
2580
2581  enc_class Push_Reg_Mod_D( regD dst, regD src) %{
2582    // load dst in FPR0
2583    emit_opcode( cbuf, 0xD9 );
2584    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2585    if ($src$$reg != FPR1L_enc) {
2586      // fincstp
2587      emit_opcode (cbuf, 0xD9);
2588      emit_opcode (cbuf, 0xF7);
2589      // swap src with FPR1:
2590      // FXCH FPR1 with src
2591      emit_opcode(cbuf, 0xD9);
2592      emit_d8(cbuf, 0xC8-1+$src$$reg );
2593      // fdecstp
2594      emit_opcode (cbuf, 0xD9);
2595      emit_opcode (cbuf, 0xF6);
2596    }
2597  %}
2598
2599  enc_class Push_ModD_encoding( regXD src0, regXD src1) %{
2600    // Allocate a word
2601    emit_opcode(cbuf,0x83);            // SUB ESP,8
2602    emit_opcode(cbuf,0xEC);
2603    emit_d8(cbuf,0x08);
2604
2605    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src1
2606    emit_opcode  (cbuf, 0x0F );
2607    emit_opcode  (cbuf, 0x11 );
2608    encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2609
2610    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2611    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2612
2613    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src0
2614    emit_opcode  (cbuf, 0x0F );
2615    emit_opcode  (cbuf, 0x11 );
2616    encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2617
2618    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2619    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2620
2621  %}
2622
2623  enc_class Push_ModX_encoding( regX src0, regX src1) %{
2624    // Allocate a word
2625    emit_opcode(cbuf,0x83);            // SUB ESP,4
2626    emit_opcode(cbuf,0xEC);
2627    emit_d8(cbuf,0x04);
2628
2629    emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src1
2630    emit_opcode  (cbuf, 0x0F );
2631    emit_opcode  (cbuf, 0x11 );
2632    encode_RegMem(cbuf, $src1$$reg, ESP_enc, 0x4, 0, 0, false);
2633
2634    emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2635    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2636
2637    emit_opcode  (cbuf, 0xF3 );     // MOVSS [ESP], src0
2638    emit_opcode  (cbuf, 0x0F );
2639    emit_opcode  (cbuf, 0x11 );
2640    encode_RegMem(cbuf, $src0$$reg, ESP_enc, 0x4, 0, 0, false);
2641
2642    emit_opcode(cbuf,0xD9 );      // FLD [ESP]
2643    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2644
2645  %}
2646
2647  enc_class Push_ResultXD(regXD dst) %{
2648    store_to_stackslot( cbuf, 0xDD, 0x03, 0 ); //FSTP [ESP]
2649
2650    // UseXmmLoadAndClearUpper ? movsd dst,[esp] : movlpd dst,[esp]
2651    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
2652    emit_opcode  (cbuf, 0x0F );
2653    emit_opcode  (cbuf, UseXmmLoadAndClearUpper ? 0x10 : 0x12);
2654    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2655
2656    emit_opcode(cbuf,0x83);    // ADD ESP,8
2657    emit_opcode(cbuf,0xC4);
2658    emit_d8(cbuf,0x08);
2659  %}
2660
2661  enc_class Push_ResultX(regX dst, immI d8) %{
2662    store_to_stackslot( cbuf, 0xD9, 0x03, 0 ); //FSTP_S [ESP]
2663
2664    emit_opcode  (cbuf, 0xF3 );     // MOVSS dst(xmm), [ESP]
2665    emit_opcode  (cbuf, 0x0F );
2666    emit_opcode  (cbuf, 0x10 );
2667    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
2668
2669    emit_opcode(cbuf,0x83);    // ADD ESP,d8 (4 or 8)
2670    emit_opcode(cbuf,0xC4);
2671    emit_d8(cbuf,$d8$$constant);
2672  %}
2673
2674  enc_class Push_SrcXD(regXD src) %{
2675    // Allocate a word
2676    emit_opcode(cbuf,0x83);            // SUB ESP,8
2677    emit_opcode(cbuf,0xEC);
2678    emit_d8(cbuf,0x08);
2679
2680    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], src
2681    emit_opcode  (cbuf, 0x0F );
2682    emit_opcode  (cbuf, 0x11 );
2683    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
2684
2685    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2686    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2687  %}
2688
2689  enc_class push_stack_temp_qword() %{
2690    emit_opcode(cbuf,0x83);     // SUB ESP,8
2691    emit_opcode(cbuf,0xEC);
2692    emit_d8    (cbuf,0x08);
2693  %}
2694
2695  enc_class pop_stack_temp_qword() %{
2696    emit_opcode(cbuf,0x83);     // ADD ESP,8
2697    emit_opcode(cbuf,0xC4);
2698    emit_d8    (cbuf,0x08);
2699  %}
2700
2701  enc_class push_xmm_to_fpr1( regXD xmm_src ) %{
2702    emit_opcode  (cbuf, 0xF2 );     // MOVSD [ESP], xmm_src
2703    emit_opcode  (cbuf, 0x0F );
2704    emit_opcode  (cbuf, 0x11 );
2705    encode_RegMem(cbuf, $xmm_src$$reg, ESP_enc, 0x4, 0, 0, false);
2706
2707    emit_opcode(cbuf,0xDD );      // FLD_D [ESP]
2708    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2709  %}
2710
2711  // Compute X^Y using Intel's fast hardware instructions, if possible.
2712  // Otherwise return a NaN.
2713  enc_class pow_exp_core_encoding %{
2714    // FPR1 holds Y*ln2(X).  Compute FPR1 = 2^(Y*ln2(X))
2715    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xC0);  // fdup = fld st(0)          Q       Q
2716    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xFC);  // frndint               int(Q)      Q
2717    emit_opcode(cbuf,0xDC); emit_opcode(cbuf,0xE9);  // fsub st(1) -= st(0);  int(Q) frac(Q)
2718    emit_opcode(cbuf,0xDB);                          // FISTP [ESP]           frac(Q)
2719    emit_opcode(cbuf,0x1C);
2720    emit_d8(cbuf,0x24);
2721    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xF0);  // f2xm1                 2^frac(Q)-1
2722    emit_opcode(cbuf,0xD9); emit_opcode(cbuf,0xE8);  // fld1                  1 2^frac(Q)-1
2723    emit_opcode(cbuf,0xDE); emit_opcode(cbuf,0xC1);  // faddp                 2^frac(Q)
2724    emit_opcode(cbuf,0x8B);                          // mov rax,[esp+0]=int(Q)
2725    encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 0, false);
2726    emit_opcode(cbuf,0xC7);                          // mov rcx,0xFFFFF800 - overflow mask
2727    emit_rm(cbuf, 0x3, 0x0, ECX_enc);
2728    emit_d32(cbuf,0xFFFFF800);
2729    emit_opcode(cbuf,0x81);                          // add rax,1023 - the double exponent bias
2730    emit_rm(cbuf, 0x3, 0x0, EAX_enc);
2731    emit_d32(cbuf,1023);
2732    emit_opcode(cbuf,0x8B);                          // mov rbx,eax
2733    emit_rm(cbuf, 0x3, EBX_enc, EAX_enc);
2734    emit_opcode(cbuf,0xC1);                          // shl rax,20 - Slide to exponent position
2735    emit_rm(cbuf,0x3,0x4,EAX_enc);
2736    emit_d8(cbuf,20);
2737    emit_opcode(cbuf,0x85);                          // test rbx,ecx - check for overflow
2738    emit_rm(cbuf, 0x3, EBX_enc, ECX_enc);
2739    emit_opcode(cbuf,0x0F); emit_opcode(cbuf,0x45);  // CMOVne rax,ecx - overflow; stuff NAN into EAX
2740    emit_rm(cbuf, 0x3, EAX_enc, ECX_enc);
2741    emit_opcode(cbuf,0x89);                          // mov [esp+4],eax - Store as part of double word
2742    encode_RegMem(cbuf, EAX_enc, ESP_enc, 0x4, 0, 4, false);
2743    emit_opcode(cbuf,0xC7);                          // mov [esp+0],0   - [ESP] = (double)(1<<int(Q)) = 2^int(Q)
2744    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
2745    emit_d32(cbuf,0);
2746    emit_opcode(cbuf,0xDC);                          // fmul dword st(0),[esp+0]; FPR1 = 2^int(Q)*2^frac(Q) = 2^Q
2747    encode_RegMem(cbuf, 0x1, ESP_enc, 0x4, 0, 0, false);
2748  %}
2749
2750//   enc_class Pop_Reg_Mod_D( regD dst, regD src)
2751//   was replaced by Push_Result_Mod_D followed by Pop_Reg_X() or Pop_Mem_X()
2752
2753  enc_class Push_Result_Mod_D( regD src) %{
2754    if ($src$$reg != FPR1L_enc) {
2755      // fincstp
2756      emit_opcode (cbuf, 0xD9);
2757      emit_opcode (cbuf, 0xF7);
2758      // FXCH FPR1 with src
2759      emit_opcode(cbuf, 0xD9);
2760      emit_d8(cbuf, 0xC8-1+$src$$reg );
2761      // fdecstp
2762      emit_opcode (cbuf, 0xD9);
2763      emit_opcode (cbuf, 0xF6);
2764    }
2765    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2766    // // FSTP   FPR$dst$$reg
2767    // emit_opcode( cbuf, 0xDD );
2768    // emit_d8( cbuf, 0xD8+$dst$$reg );
2769  %}
2770
2771  enc_class fnstsw_sahf_skip_parity() %{
2772    // fnstsw ax
2773    emit_opcode( cbuf, 0xDF );
2774    emit_opcode( cbuf, 0xE0 );
2775    // sahf
2776    emit_opcode( cbuf, 0x9E );
2777    // jnp  ::skip
2778    emit_opcode( cbuf, 0x7B );
2779    emit_opcode( cbuf, 0x05 );
2780  %}
2781
2782  enc_class emitModD() %{
2783    // fprem must be iterative
2784    // :: loop
2785    // fprem
2786    emit_opcode( cbuf, 0xD9 );
2787    emit_opcode( cbuf, 0xF8 );
2788    // wait
2789    emit_opcode( cbuf, 0x9b );
2790    // fnstsw ax
2791    emit_opcode( cbuf, 0xDF );
2792    emit_opcode( cbuf, 0xE0 );
2793    // sahf
2794    emit_opcode( cbuf, 0x9E );
2795    // jp  ::loop
2796    emit_opcode( cbuf, 0x0F );
2797    emit_opcode( cbuf, 0x8A );
2798    emit_opcode( cbuf, 0xF4 );
2799    emit_opcode( cbuf, 0xFF );
2800    emit_opcode( cbuf, 0xFF );
2801    emit_opcode( cbuf, 0xFF );
2802  %}
2803
2804  enc_class fpu_flags() %{
2805    // fnstsw_ax
2806    emit_opcode( cbuf, 0xDF);
2807    emit_opcode( cbuf, 0xE0);
2808    // test ax,0x0400
2809    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2810    emit_opcode( cbuf, 0xA9 );
2811    emit_d16   ( cbuf, 0x0400 );
2812    // // // This sequence works, but stalls for 12-16 cycles on PPro
2813    // // test rax,0x0400
2814    // emit_opcode( cbuf, 0xA9 );
2815    // emit_d32   ( cbuf, 0x00000400 );
2816    //
2817    // jz exit (no unordered comparison)
2818    emit_opcode( cbuf, 0x74 );
2819    emit_d8    ( cbuf, 0x02 );
2820    // mov ah,1 - treat as LT case (set carry flag)
2821    emit_opcode( cbuf, 0xB4 );
2822    emit_d8    ( cbuf, 0x01 );
2823    // sahf
2824    emit_opcode( cbuf, 0x9E);
2825  %}
2826
2827  enc_class cmpF_P6_fixup() %{
2828    // Fixup the integer flags in case comparison involved a NaN
2829    //
2830    // JNP exit (no unordered comparison, P-flag is set by NaN)
2831    emit_opcode( cbuf, 0x7B );
2832    emit_d8    ( cbuf, 0x03 );
2833    // MOV AH,1 - treat as LT case (set carry flag)
2834    emit_opcode( cbuf, 0xB4 );
2835    emit_d8    ( cbuf, 0x01 );
2836    // SAHF
2837    emit_opcode( cbuf, 0x9E);
2838    // NOP     // target for branch to avoid branch to branch
2839    emit_opcode( cbuf, 0x90);
2840  %}
2841
2842//     fnstsw_ax();
2843//     sahf();
2844//     movl(dst, nan_result);
2845//     jcc(Assembler::parity, exit);
2846//     movl(dst, less_result);
2847//     jcc(Assembler::below, exit);
2848//     movl(dst, equal_result);
2849//     jcc(Assembler::equal, exit);
2850//     movl(dst, greater_result);
2851
2852// less_result     =  1;
2853// greater_result  = -1;
2854// equal_result    = 0;
2855// nan_result      = -1;
2856
2857  enc_class CmpF_Result(eRegI dst) %{
2858    // fnstsw_ax();
2859    emit_opcode( cbuf, 0xDF);
2860    emit_opcode( cbuf, 0xE0);
2861    // sahf
2862    emit_opcode( cbuf, 0x9E);
2863    // movl(dst, nan_result);
2864    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2865    emit_d32( cbuf, -1 );
2866    // jcc(Assembler::parity, exit);
2867    emit_opcode( cbuf, 0x7A );
2868    emit_d8    ( cbuf, 0x13 );
2869    // movl(dst, less_result);
2870    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2871    emit_d32( cbuf, -1 );
2872    // jcc(Assembler::below, exit);
2873    emit_opcode( cbuf, 0x72 );
2874    emit_d8    ( cbuf, 0x0C );
2875    // movl(dst, equal_result);
2876    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2877    emit_d32( cbuf, 0 );
2878    // jcc(Assembler::equal, exit);
2879    emit_opcode( cbuf, 0x74 );
2880    emit_d8    ( cbuf, 0x05 );
2881    // movl(dst, greater_result);
2882    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2883    emit_d32( cbuf, 1 );
2884  %}
2885
2886
2887  // XMM version of CmpF_Result. Because the XMM compare
2888  // instructions set the EFLAGS directly. It becomes simpler than
2889  // the float version above.
2890  enc_class CmpX_Result(eRegI dst) %{
2891    MacroAssembler _masm(&cbuf);
2892    Label nan, inc, done;
2893
2894    __ jccb(Assembler::parity, nan);
2895    __ jccb(Assembler::equal,  done);
2896    __ jccb(Assembler::above,  inc);
2897    __ bind(nan);
2898    __ decrement(as_Register($dst$$reg)); // NO L qqq
2899    __ jmpb(done);
2900    __ bind(inc);
2901    __ increment(as_Register($dst$$reg)); // NO L qqq
2902    __ bind(done);
2903  %}
2904
2905  // Compare the longs and set flags
2906  // BROKEN!  Do Not use as-is
2907  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2908    // CMP    $src1.hi,$src2.hi
2909    emit_opcode( cbuf, 0x3B );
2910    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2911    // JNE,s  done
2912    emit_opcode(cbuf,0x75);
2913    emit_d8(cbuf, 2 );
2914    // CMP    $src1.lo,$src2.lo
2915    emit_opcode( cbuf, 0x3B );
2916    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2917// done:
2918  %}
2919
2920  enc_class convert_int_long( regL dst, eRegI src ) %{
2921    // mov $dst.lo,$src
2922    int dst_encoding = $dst$$reg;
2923    int src_encoding = $src$$reg;
2924    encode_Copy( cbuf, dst_encoding  , src_encoding );
2925    // mov $dst.hi,$src
2926    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2927    // sar $dst.hi,31
2928    emit_opcode( cbuf, 0xC1 );
2929    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2930    emit_d8(cbuf, 0x1F );
2931  %}
2932
2933  enc_class convert_long_double( eRegL src ) %{
2934    // push $src.hi
2935    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2936    // push $src.lo
2937    emit_opcode(cbuf, 0x50+$src$$reg  );
2938    // fild 64-bits at [SP]
2939    emit_opcode(cbuf,0xdf);
2940    emit_d8(cbuf, 0x6C);
2941    emit_d8(cbuf, 0x24);
2942    emit_d8(cbuf, 0x00);
2943    // pop stack
2944    emit_opcode(cbuf, 0x83); // add  SP, #8
2945    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2946    emit_d8(cbuf, 0x8);
2947  %}
2948
2949  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2950    // IMUL   EDX:EAX,$src1
2951    emit_opcode( cbuf, 0xF7 );
2952    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2953    // SAR    EDX,$cnt-32
2954    int shift_count = ((int)$cnt$$constant) - 32;
2955    if (shift_count > 0) {
2956      emit_opcode(cbuf, 0xC1);
2957      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2958      emit_d8(cbuf, shift_count);
2959    }
2960  %}
2961
2962  // this version doesn't have add sp, 8
2963  enc_class convert_long_double2( eRegL src ) %{
2964    // push $src.hi
2965    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2966    // push $src.lo
2967    emit_opcode(cbuf, 0x50+$src$$reg  );
2968    // fild 64-bits at [SP]
2969    emit_opcode(cbuf,0xdf);
2970    emit_d8(cbuf, 0x6C);
2971    emit_d8(cbuf, 0x24);
2972    emit_d8(cbuf, 0x00);
2973  %}
2974
2975  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2976    // Basic idea: long = (long)int * (long)int
2977    // IMUL EDX:EAX, src
2978    emit_opcode( cbuf, 0xF7 );
2979    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2980  %}
2981
2982  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2983    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2984    // MUL EDX:EAX, src
2985    emit_opcode( cbuf, 0xF7 );
2986    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2987  %}
2988
2989  enc_class long_multiply( eADXRegL dst, eRegL src, eRegI tmp ) %{
2990    // Basic idea: lo(result) = lo(x_lo * y_lo)
2991    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2992    // MOV    $tmp,$src.lo
2993    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2994    // IMUL   $tmp,EDX
2995    emit_opcode( cbuf, 0x0F );
2996    emit_opcode( cbuf, 0xAF );
2997    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2998    // MOV    EDX,$src.hi
2999    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
3000    // IMUL   EDX,EAX
3001    emit_opcode( cbuf, 0x0F );
3002    emit_opcode( cbuf, 0xAF );
3003    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
3004    // ADD    $tmp,EDX
3005    emit_opcode( cbuf, 0x03 );
3006    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3007    // MUL   EDX:EAX,$src.lo
3008    emit_opcode( cbuf, 0xF7 );
3009    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
3010    // ADD    EDX,ESI
3011    emit_opcode( cbuf, 0x03 );
3012    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
3013  %}
3014
3015  enc_class long_multiply_con( eADXRegL dst, immL_127 src, eRegI tmp ) %{
3016    // Basic idea: lo(result) = lo(src * y_lo)
3017    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
3018    // IMUL   $tmp,EDX,$src
3019    emit_opcode( cbuf, 0x6B );
3020    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
3021    emit_d8( cbuf, (int)$src$$constant );
3022    // MOV    EDX,$src
3023    emit_opcode(cbuf, 0xB8 + EDX_enc);
3024    emit_d32( cbuf, (int)$src$$constant );
3025    // MUL   EDX:EAX,EDX
3026    emit_opcode( cbuf, 0xF7 );
3027    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
3028    // ADD    EDX,ESI
3029    emit_opcode( cbuf, 0x03 );
3030    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
3031  %}
3032
3033  enc_class long_div( eRegL src1, eRegL src2 ) %{
3034    // PUSH src1.hi
3035    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3036    // PUSH src1.lo
3037    emit_opcode(cbuf,               0x50+$src1$$reg  );
3038    // PUSH src2.hi
3039    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3040    // PUSH src2.lo
3041    emit_opcode(cbuf,               0x50+$src2$$reg  );
3042    // CALL directly to the runtime
3043    cbuf.set_inst_mark();
3044    emit_opcode(cbuf,0xE8);       // Call into runtime
3045    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3046    // Restore stack
3047    emit_opcode(cbuf, 0x83); // add  SP, #framesize
3048    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3049    emit_d8(cbuf, 4*4);
3050  %}
3051
3052  enc_class long_mod( eRegL src1, eRegL src2 ) %{
3053    // PUSH src1.hi
3054    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
3055    // PUSH src1.lo
3056    emit_opcode(cbuf,               0x50+$src1$$reg  );
3057    // PUSH src2.hi
3058    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
3059    // PUSH src2.lo
3060    emit_opcode(cbuf,               0x50+$src2$$reg  );
3061    // CALL directly to the runtime
3062    cbuf.set_inst_mark();
3063    emit_opcode(cbuf,0xE8);       // Call into runtime
3064    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3065    // Restore stack
3066    emit_opcode(cbuf, 0x83); // add  SP, #framesize
3067    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
3068    emit_d8(cbuf, 4*4);
3069  %}
3070
3071  enc_class long_cmp_flags0( eRegL src, eRegI tmp ) %{
3072    // MOV   $tmp,$src.lo
3073    emit_opcode(cbuf, 0x8B);
3074    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
3075    // OR    $tmp,$src.hi
3076    emit_opcode(cbuf, 0x0B);
3077    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
3078  %}
3079
3080  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
3081    // CMP    $src1.lo,$src2.lo
3082    emit_opcode( cbuf, 0x3B );
3083    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3084    // JNE,s  skip
3085    emit_cc(cbuf, 0x70, 0x5);
3086    emit_d8(cbuf,2);
3087    // CMP    $src1.hi,$src2.hi
3088    emit_opcode( cbuf, 0x3B );
3089    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
3090  %}
3091
3092  enc_class long_cmp_flags2( eRegL src1, eRegL src2, eRegI tmp ) %{
3093    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
3094    emit_opcode( cbuf, 0x3B );
3095    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
3096    // MOV    $tmp,$src1.hi
3097    emit_opcode( cbuf, 0x8B );
3098    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
3099    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
3100    emit_opcode( cbuf, 0x1B );
3101    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
3102  %}
3103
3104  enc_class long_cmp_flags3( eRegL src, eRegI tmp ) %{
3105    // XOR    $tmp,$tmp
3106    emit_opcode(cbuf,0x33);  // XOR
3107    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
3108    // CMP    $tmp,$src.lo
3109    emit_opcode( cbuf, 0x3B );
3110    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
3111    // SBB    $tmp,$src.hi
3112    emit_opcode( cbuf, 0x1B );
3113    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
3114  %}
3115
3116 // Sniff, sniff... smells like Gnu Superoptimizer
3117  enc_class neg_long( eRegL dst ) %{
3118    emit_opcode(cbuf,0xF7);    // NEG hi
3119    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3120    emit_opcode(cbuf,0xF7);    // NEG lo
3121    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
3122    emit_opcode(cbuf,0x83);    // SBB hi,0
3123    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
3124    emit_d8    (cbuf,0 );
3125  %}
3126
3127  enc_class movq_ld(regXD dst, memory mem) %{
3128    MacroAssembler _masm(&cbuf);
3129    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3130    __ movq(as_XMMRegister($dst$$reg), madr);
3131  %}
3132
3133  enc_class movq_st(memory mem, regXD src) %{
3134    MacroAssembler _masm(&cbuf);
3135    Address madr = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp);
3136    __ movq(madr, as_XMMRegister($src$$reg));
3137  %}
3138
3139  enc_class pshufd_8x8(regX dst, regX src) %{
3140    MacroAssembler _masm(&cbuf);
3141
3142    encode_CopyXD(cbuf, $dst$$reg, $src$$reg);
3143    __ punpcklbw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg));
3144    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($dst$$reg), 0x00);
3145  %}
3146
3147  enc_class pshufd_4x16(regX dst, regX src) %{
3148    MacroAssembler _masm(&cbuf);
3149
3150    __ pshuflw(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), 0x00);
3151  %}
3152
3153  enc_class pshufd(regXD dst, regXD src, int mode) %{
3154    MacroAssembler _masm(&cbuf);
3155
3156    __ pshufd(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg), $mode);
3157  %}
3158
3159  enc_class pxor(regXD dst, regXD src) %{
3160    MacroAssembler _masm(&cbuf);
3161
3162    __ pxor(as_XMMRegister($dst$$reg), as_XMMRegister($src$$reg));
3163  %}
3164
3165  enc_class mov_i2x(regXD dst, eRegI src) %{
3166    MacroAssembler _masm(&cbuf);
3167
3168    __ movdl(as_XMMRegister($dst$$reg), as_Register($src$$reg));
3169  %}
3170
3171
3172  // Because the transitions from emitted code to the runtime
3173  // monitorenter/exit helper stubs are so slow it's critical that
3174  // we inline both the stack-locking fast-path and the inflated fast path.
3175  //
3176  // See also: cmpFastLock and cmpFastUnlock.
3177  //
3178  // What follows is a specialized inline transliteration of the code
3179  // in slow_enter() and slow_exit().  If we're concerned about I$ bloat
3180  // another option would be to emit TrySlowEnter and TrySlowExit methods
3181  // at startup-time.  These methods would accept arguments as
3182  // (rax,=Obj, rbx=Self, rcx=box, rdx=Scratch) and return success-failure
3183  // indications in the icc.ZFlag.  Fast_Lock and Fast_Unlock would simply
3184  // marshal the arguments and emit calls to TrySlowEnter and TrySlowExit.
3185  // In practice, however, the # of lock sites is bounded and is usually small.
3186  // Besides the call overhead, TrySlowEnter and TrySlowExit might suffer
3187  // if the processor uses simple bimodal branch predictors keyed by EIP
3188  // Since the helper routines would be called from multiple synchronization
3189  // sites.
3190  //
3191  // An even better approach would be write "MonitorEnter()" and "MonitorExit()"
3192  // in java - using j.u.c and unsafe - and just bind the lock and unlock sites
3193  // to those specialized methods.  That'd give us a mostly platform-independent
3194  // implementation that the JITs could optimize and inline at their pleasure.
3195  // Done correctly, the only time we'd need to cross to native could would be
3196  // to park() or unpark() threads.  We'd also need a few more unsafe operators
3197  // to (a) prevent compiler-JIT reordering of non-volatile accesses, and
3198  // (b) explicit barriers or fence operations.
3199  //
3200  // TODO:
3201  //
3202  // *  Arrange for C2 to pass "Self" into Fast_Lock and Fast_Unlock in one of the registers (scr).
3203  //    This avoids manifesting the Self pointer in the Fast_Lock and Fast_Unlock terminals.
3204  //    Given TLAB allocation, Self is usually manifested in a register, so passing it into
3205  //    the lock operators would typically be faster than reifying Self.
3206  //
3207  // *  Ideally I'd define the primitives as:
3208  //       fast_lock   (nax Obj, nax box, EAX tmp, nax scr) where box, tmp and scr are KILLED.
3209  //       fast_unlock (nax Obj, EAX box, nax tmp) where box and tmp are KILLED
3210  //    Unfortunately ADLC bugs prevent us from expressing the ideal form.
3211  //    Instead, we're stuck with a rather awkward and brittle register assignments below.
3212  //    Furthermore the register assignments are overconstrained, possibly resulting in
3213  //    sub-optimal code near the synchronization site.
3214  //
3215  // *  Eliminate the sp-proximity tests and just use "== Self" tests instead.
3216  //    Alternately, use a better sp-proximity test.
3217  //
3218  // *  Currently ObjectMonitor._Owner can hold either an sp value or a (THREAD *) value.
3219  //    Either one is sufficient to uniquely identify a thread.
3220  //    TODO: eliminate use of sp in _owner and use get_thread(tr) instead.
3221  //
3222  // *  Intrinsify notify() and notifyAll() for the common cases where the
3223  //    object is locked by the calling thread but the waitlist is empty.
3224  //    avoid the expensive JNI call to JVM_Notify() and JVM_NotifyAll().
3225  //
3226  // *  use jccb and jmpb instead of jcc and jmp to improve code density.
3227  //    But beware of excessive branch density on AMD Opterons.
3228  //
3229  // *  Both Fast_Lock and Fast_Unlock set the ICC.ZF to indicate success
3230  //    or failure of the fast-path.  If the fast-path fails then we pass
3231  //    control to the slow-path, typically in C.  In Fast_Lock and
3232  //    Fast_Unlock we often branch to DONE_LABEL, just to find that C2
3233  //    will emit a conditional branch immediately after the node.
3234  //    So we have branches to branches and lots of ICC.ZF games.
3235  //    Instead, it might be better to have C2 pass a "FailureLabel"
3236  //    into Fast_Lock and Fast_Unlock.  In the case of success, control
3237  //    will drop through the node.  ICC.ZF is undefined at exit.
3238  //    In the case of failure, the node will branch directly to the
3239  //    FailureLabel
3240
3241
3242  // obj: object to lock
3243  // box: on-stack box address (displaced header location) - KILLED
3244  // rax,: tmp -- KILLED
3245  // scr: tmp -- KILLED
3246  enc_class Fast_Lock( eRegP obj, eRegP box, eAXRegI tmp, eRegP scr ) %{
3247
3248    Register objReg = as_Register($obj$$reg);
3249    Register boxReg = as_Register($box$$reg);
3250    Register tmpReg = as_Register($tmp$$reg);
3251    Register scrReg = as_Register($scr$$reg);
3252
3253    // Ensure the register assignents are disjoint
3254    guarantee (objReg != boxReg, "") ;
3255    guarantee (objReg != tmpReg, "") ;
3256    guarantee (objReg != scrReg, "") ;
3257    guarantee (boxReg != tmpReg, "") ;
3258    guarantee (boxReg != scrReg, "") ;
3259    guarantee (tmpReg == as_Register(EAX_enc), "") ;
3260
3261    MacroAssembler masm(&cbuf);
3262
3263    if (_counters != NULL) {
3264      masm.atomic_incl(ExternalAddress((address) _counters->total_entry_count_addr()));
3265    }
3266    if (EmitSync & 1) {
3267        // set box->dhw = unused_mark (3)
3268        // Force all sync thru slow-path: slow_enter() and slow_exit() 
3269        masm.movptr (Address(boxReg, 0), int32_t(markOopDesc::unused_mark())) ;             
3270        masm.cmpptr (rsp, (int32_t)0) ;                        
3271    } else 
3272    if (EmitSync & 2) { 
3273        Label DONE_LABEL ;           
3274        if (UseBiasedLocking) {
3275           // Note: tmpReg maps to the swap_reg argument and scrReg to the tmp_reg argument.
3276           masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3277        }
3278
3279        masm.movptr(tmpReg, Address(objReg, 0)) ;          // fetch markword 
3280        masm.orptr (tmpReg, 0x1);
3281        masm.movptr(Address(boxReg, 0), tmpReg);           // Anticipate successful CAS 
3282        if (os::is_MP()) { masm.lock();  }
3283        masm.cmpxchgptr(boxReg, Address(objReg, 0));          // Updates tmpReg
3284        masm.jcc(Assembler::equal, DONE_LABEL);
3285        // Recursive locking
3286        masm.subptr(tmpReg, rsp);
3287        masm.andptr(tmpReg, (int32_t) 0xFFFFF003 );
3288        masm.movptr(Address(boxReg, 0), tmpReg);
3289        masm.bind(DONE_LABEL) ; 
3290    } else {  
3291      // Possible cases that we'll encounter in fast_lock 
3292      // ------------------------------------------------
3293      // * Inflated
3294      //    -- unlocked
3295      //    -- Locked
3296      //       = by self
3297      //       = by other
3298      // * biased
3299      //    -- by Self
3300      //    -- by other
3301      // * neutral
3302      // * stack-locked
3303      //    -- by self
3304      //       = sp-proximity test hits
3305      //       = sp-proximity test generates false-negative
3306      //    -- by other
3307      //
3308
3309      Label IsInflated, DONE_LABEL, PopDone ;
3310
3311      // TODO: optimize away redundant LDs of obj->mark and improve the markword triage
3312      // order to reduce the number of conditional branches in the most common cases.
3313      // Beware -- there's a subtle invariant that fetch of the markword
3314      // at [FETCH], below, will never observe a biased encoding (*101b).
3315      // If this invariant is not held we risk exclusion (safety) failure.
3316      if (UseBiasedLocking && !UseOptoBiasInlining) {
3317        masm.biased_locking_enter(boxReg, objReg, tmpReg, scrReg, false, DONE_LABEL, NULL, _counters);
3318      }
3319
3320      masm.movptr(tmpReg, Address(objReg, 0)) ;         // [FETCH]
3321      masm.testptr(tmpReg, 0x02) ;                      // Inflated v (Stack-locked or neutral)
3322      masm.jccb  (Assembler::notZero, IsInflated) ;
3323
3324      // Attempt stack-locking ...
3325      masm.orptr (tmpReg, 0x1);
3326      masm.movptr(Address(boxReg, 0), tmpReg);          // Anticipate successful CAS
3327      if (os::is_MP()) { masm.lock();  }
3328      masm.cmpxchgptr(boxReg, Address(objReg, 0));           // Updates tmpReg
3329      if (_counters != NULL) {
3330        masm.cond_inc32(Assembler::equal,
3331                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3332      }
3333      masm.jccb (Assembler::equal, DONE_LABEL);
3334
3335      // Recursive locking
3336      masm.subptr(tmpReg, rsp);
3337      masm.andptr(tmpReg, 0xFFFFF003 );
3338      masm.movptr(Address(boxReg, 0), tmpReg);
3339      if (_counters != NULL) {
3340        masm.cond_inc32(Assembler::equal,
3341                        ExternalAddress((address)_counters->fast_path_entry_count_addr()));
3342      }
3343      masm.jmp  (DONE_LABEL) ;
3344
3345      masm.bind (IsInflated) ;
3346
3347      // The object is inflated.
3348      //
3349      // TODO-FIXME: eliminate the ugly use of manifest constants:
3350      //   Use markOopDesc::monitor_value instead of "2".
3351      //   use markOop::unused_mark() instead of "3".
3352      // The tmpReg value is an objectMonitor reference ORed with
3353      // markOopDesc::monitor_value (2).   We can either convert tmpReg to an
3354      // objectmonitor pointer by masking off the "2" bit or we can just
3355      // use tmpReg as an objectmonitor pointer but bias the objectmonitor
3356      // field offsets with "-2" to compensate for and annul the low-order tag bit.
3357      //
3358      // I use the latter as it avoids AGI stalls.
3359      // As such, we write "mov r, [tmpReg+OFFSETOF(Owner)-2]"
3360      // instead of "mov r, [tmpReg+OFFSETOF(Owner)]".
3361      //
3362      #define OFFSET_SKEWED(f) ((ObjectMonitor::f ## _offset_in_bytes())-2)
3363
3364      // boxReg refers to the on-stack BasicLock in the current frame.
3365      // We'd like to write:
3366      //   set box->_displaced_header = markOop::unused_mark().  Any non-0 value suffices.
3367      // This is convenient but results a ST-before-CAS penalty.  The following CAS suffers
3368      // additional latency as we have another ST in the store buffer that must drain.
3369
3370      if (EmitSync & 8192) { 
3371         masm.movptr(Address(boxReg, 0), 3) ;            // results in ST-before-CAS penalty
3372         masm.get_thread (scrReg) ; 
3373         masm.movptr(boxReg, tmpReg);                    // consider: LEA box, [tmp-2] 
3374         masm.movptr(tmpReg, NULL_WORD);                 // consider: xor vs mov
3375         if (os::is_MP()) { masm.lock(); } 
3376         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3377      } else 
3378      if ((EmitSync & 128) == 0) {                      // avoid ST-before-CAS
3379         masm.movptr(scrReg, boxReg) ; 
3380         masm.movptr(boxReg, tmpReg);                   // consider: LEA box, [tmp-2] 
3381
3382         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3383         if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3384            // prefetchw [eax + Offset(_owner)-2]
3385            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3386         }
3387
3388         if ((EmitSync & 64) == 0) {
3389           // Optimistic form: consider XORL tmpReg,tmpReg
3390           masm.movptr(tmpReg, NULL_WORD) ; 
3391         } else { 
3392           // Can suffer RTS->RTO upgrades on shared or cold $ lines
3393           // Test-And-CAS instead of CAS
3394           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3395           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3396           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3397         }
3398
3399         // Appears unlocked - try to swing _owner from null to non-null.
3400         // Ideally, I'd manifest "Self" with get_thread and then attempt
3401         // to CAS the register containing Self into m->Owner.
3402         // But we don't have enough registers, so instead we can either try to CAS
3403         // rsp or the address of the box (in scr) into &m->owner.  If the CAS succeeds
3404         // we later store "Self" into m->Owner.  Transiently storing a stack address
3405         // (rsp or the address of the box) into  m->owner is harmless.
3406         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3407         if (os::is_MP()) { masm.lock();  }
3408         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ; 
3409         masm.movptr(Address(scrReg, 0), 3) ;          // box->_displaced_header = 3
3410         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3411         masm.get_thread (scrReg) ;                    // beware: clobbers ICCs
3412         masm.movptr(Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2), scrReg) ; 
3413         masm.xorptr(boxReg, boxReg) ;                 // set icc.ZFlag = 1 to indicate success
3414                       
3415         // If the CAS fails we can either retry or pass control to the slow-path.  
3416         // We use the latter tactic.  
3417         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3418         // If the CAS was successful ...
3419         //   Self has acquired the lock
3420         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3421         // Intentional fall-through into DONE_LABEL ...
3422      } else {
3423         masm.movptr(Address(boxReg, 0), 3) ;       // results in ST-before-CAS penalty
3424         masm.movptr(boxReg, tmpReg) ; 
3425
3426         // Using a prefetchw helps avoid later RTS->RTO upgrades and cache probes
3427         if ((EmitSync & 2048) && VM_Version::supports_3dnow() && os::is_MP()) {
3428            // prefetchw [eax + Offset(_owner)-2]
3429            masm.prefetchw(Address(rax, ObjectMonitor::owner_offset_in_bytes()-2));
3430         }
3431
3432         if ((EmitSync & 64) == 0) {
3433           // Optimistic form
3434           masm.xorptr  (tmpReg, tmpReg) ; 
3435         } else { 
3436           // Can suffer RTS->RTO upgrades on shared or cold $ lines
3437           masm.movptr(tmpReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;   // rax, = m->_owner
3438           masm.testptr(tmpReg, tmpReg) ;                   // Locked ? 
3439           masm.jccb  (Assembler::notZero, DONE_LABEL) ;                   
3440         }
3441
3442         // Appears unlocked - try to swing _owner from null to non-null.
3443         // Use either "Self" (in scr) or rsp as thread identity in _owner.
3444         // Invariant: tmpReg == 0.  tmpReg is EAX which is the implicit cmpxchg comparand.
3445         masm.get_thread (scrReg) ;
3446         if (os::is_MP()) { masm.lock(); }
3447         masm.cmpxchgptr(scrReg, Address(boxReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;
3448
3449         // If the CAS fails we can either retry or pass control to the slow-path.
3450         // We use the latter tactic.
3451         // Pass the CAS result in the icc.ZFlag into DONE_LABEL
3452         // If the CAS was successful ...
3453         //   Self has acquired the lock
3454         //   Invariant: m->_recursions should already be 0, so we don't need to explicitly set it.
3455         // Intentional fall-through into DONE_LABEL ...
3456      }
3457
3458      // DONE_LABEL is a hot target - we'd really like to place it at the
3459      // start of cache line by padding with NOPs.
3460      // See the AMD and Intel software optimization manuals for the
3461      // most efficient "long" NOP encodings.
3462      // Unfortunately none of our alignment mechanisms suffice.
3463      masm.bind(DONE_LABEL);
3464
3465      // Avoid branch-to-branch on AMD processors
3466      // This appears to be superstition.
3467      if (EmitSync & 32) masm.nop() ;
3468
3469
3470      // At DONE_LABEL the icc ZFlag is set as follows ...
3471      // Fast_Unlock uses the same protocol.
3472      // ZFlag == 1 -> Success
3473      // ZFlag == 0 -> Failure - force control through the slow-path
3474    }
3475  %}
3476
3477  // obj: object to unlock
3478  // box: box address (displaced header location), killed.  Must be EAX.
3479  // rbx,: killed tmp; cannot be obj nor box.
3480  //
3481  // Some commentary on balanced locking:
3482  //
3483  // Fast_Lock and Fast_Unlock are emitted only for provably balanced lock sites.
3484  // Methods that don't have provably balanced locking are forced to run in the
3485  // interpreter - such methods won't be compiled to use fast_lock and fast_unlock.
3486  // The interpreter provides two properties:
3487  // I1:  At return-time the interpreter automatically and quietly unlocks any
3488  //      objects acquired the current activation (frame).  Recall that the
3489  //      interpreter maintains an on-stack list of locks currently held by
3490  //      a frame.
3491  // I2:  If a method attempts to unlock an object that is not held by the
3492  //      the frame the interpreter throws IMSX.
3493  //
3494  // Lets say A(), which has provably balanced locking, acquires O and then calls B().
3495  // B() doesn't have provably balanced locking so it runs in the interpreter.
3496  // Control returns to A() and A() unlocks O.  By I1 and I2, above, we know that O
3497  // is still locked by A().
3498  //
3499  // The only other source of unbalanced locking would be JNI.  The "Java Native Interface:
3500  // Programmer's Guide and Specification" claims that an object locked by jni_monitorenter
3501  // should not be unlocked by "normal" java-level locking and vice-versa.  The specification
3502  // doesn't specify what will occur if a program engages in such mixed-mode locking, however.
3503
3504  enc_class Fast_Unlock( nabxRegP obj, eAXRegP box, eRegP tmp) %{
3505
3506    Register objReg = as_Register($obj$$reg);
3507    Register boxReg = as_Register($box$$reg);
3508    Register tmpReg = as_Register($tmp$$reg);
3509
3510    guarantee (objReg != boxReg, "") ;
3511    guarantee (objReg != tmpReg, "") ;
3512    guarantee (boxReg != tmpReg, "") ;
3513    guarantee (boxReg == as_Register(EAX_enc), "") ;
3514    MacroAssembler masm(&cbuf);
3515
3516    if (EmitSync & 4) {
3517      // Disable - inhibit all inlining.  Force control through the slow-path
3518      masm.cmpptr (rsp, 0) ; 
3519    } else 
3520    if (EmitSync & 8) {
3521      Label DONE_LABEL ;
3522      if (UseBiasedLocking) {
3523         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3524      }
3525      // classic stack-locking code ...
3526      masm.movptr(tmpReg, Address(boxReg, 0)) ;
3527      masm.testptr(tmpReg, tmpReg) ;
3528      masm.jcc   (Assembler::zero, DONE_LABEL) ;
3529      if (os::is_MP()) { masm.lock(); }
3530      masm.cmpxchgptr(tmpReg, Address(objReg, 0));          // Uses EAX which is box
3531      masm.bind(DONE_LABEL);
3532    } else {
3533      Label DONE_LABEL, Stacked, CheckSucc, Inflated ;
3534
3535      // Critically, the biased locking test must have precedence over
3536      // and appear before the (box->dhw == 0) recursive stack-lock test.
3537      if (UseBiasedLocking && !UseOptoBiasInlining) {
3538         masm.biased_locking_exit(objReg, tmpReg, DONE_LABEL);
3539      }
3540      
3541      masm.cmpptr(Address(boxReg, 0), 0) ;            // Examine the displaced header
3542      masm.movptr(tmpReg, Address(objReg, 0)) ;       // Examine the object's markword
3543      masm.jccb  (Assembler::zero, DONE_LABEL) ;      // 0 indicates recursive stack-lock
3544
3545      masm.testptr(tmpReg, 0x02) ;                     // Inflated? 
3546      masm.jccb  (Assembler::zero, Stacked) ;
3547
3548      masm.bind  (Inflated) ;
3549      // It's inflated.
3550      // Despite our balanced locking property we still check that m->_owner == Self
3551      // as java routines or native JNI code called by this thread might
3552      // have released the lock.
3553      // Refer to the comments in synchronizer.cpp for how we might encode extra
3554      // state in _succ so we can avoid fetching EntryList|cxq.
3555      //
3556      // I'd like to add more cases in fast_lock() and fast_unlock() --
3557      // such as recursive enter and exit -- but we have to be wary of
3558      // I$ bloat, T$ effects and BP$ effects.
3559      //
3560      // If there's no contention try a 1-0 exit.  That is, exit without
3561      // a costly MEMBAR or CAS.  See synchronizer.cpp for details on how
3562      // we detect and recover from the race that the 1-0 exit admits.
3563      //
3564      // Conceptually Fast_Unlock() must execute a STST|LDST "release" barrier
3565      // before it STs null into _owner, releasing the lock.  Updates
3566      // to data protected by the critical section must be visible before
3567      // we drop the lock (and thus before any other thread could acquire
3568      // the lock and observe the fields protected by the lock).
3569      // IA32's memory-model is SPO, so STs are ordered with respect to
3570      // each other and there's no need for an explicit barrier (fence).
3571      // See also http://gee.cs.oswego.edu/dl/jmm/cookbook.html.
3572
3573      masm.get_thread (boxReg) ;
3574      if ((EmitSync & 4096) && VM_Version::supports_3dnow() && os::is_MP()) {
3575        // prefetchw [ebx + Offset(_owner)-2]
3576        masm.prefetchw(Address(rbx, ObjectMonitor::owner_offset_in_bytes()-2));
3577      }
3578
3579      // Note that we could employ various encoding schemes to reduce
3580      // the number of loads below (currently 4) to just 2 or 3.
3581      // Refer to the comments in synchronizer.cpp.
3582      // In practice the chain of fetches doesn't seem to impact performance, however.
3583      if ((EmitSync & 65536) == 0 && (EmitSync & 256)) {
3584         // Attempt to reduce branch density - AMD's branch predictor.
3585         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3586         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3587         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3588         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3589         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3590         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3591         masm.jmpb  (DONE_LABEL) ; 
3592      } else { 
3593         masm.xorptr(boxReg, Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2)) ;  
3594         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::recursions_offset_in_bytes()-2)) ;
3595         masm.jccb  (Assembler::notZero, DONE_LABEL) ; 
3596         masm.movptr(boxReg, Address (tmpReg, ObjectMonitor::EntryList_offset_in_bytes()-2)) ; 
3597         masm.orptr(boxReg, Address (tmpReg, ObjectMonitor::cxq_offset_in_bytes()-2)) ; 
3598         masm.jccb  (Assembler::notZero, CheckSucc) ; 
3599         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3600         masm.jmpb  (DONE_LABEL) ; 
3601      }
3602
3603      // The Following code fragment (EmitSync & 65536) improves the performance of
3604      // contended applications and contended synchronization microbenchmarks.
3605      // Unfortunately the emission of the code - even though not executed - causes regressions
3606      // in scimark and jetstream, evidently because of $ effects.  Replacing the code
3607      // with an equal number of never-executed NOPs results in the same regression.
3608      // We leave it off by default.
3609
3610      if ((EmitSync & 65536) != 0) {
3611         Label LSuccess, LGoSlowPath ;
3612
3613         masm.bind  (CheckSucc) ;
3614
3615         // Optional pre-test ... it's safe to elide this
3616         if ((EmitSync & 16) == 0) { 
3617            masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3618            masm.jccb  (Assembler::zero, LGoSlowPath) ; 
3619         }
3620
3621         // We have a classic Dekker-style idiom:
3622         //    ST m->_owner = 0 ; MEMBAR; LD m->_succ
3623         // There are a number of ways to implement the barrier:
3624         // (1) lock:andl &m->_owner, 0
3625         //     is fast, but mask doesn't currently support the "ANDL M,IMM32" form.
3626         //     LOCK: ANDL [ebx+Offset(_Owner)-2], 0
3627         //     Encodes as 81 31 OFF32 IMM32 or 83 63 OFF8 IMM8
3628         // (2) If supported, an explicit MFENCE is appealing.
3629         //     In older IA32 processors MFENCE is slower than lock:add or xchg
3630         //     particularly if the write-buffer is full as might be the case if
3631         //     if stores closely precede the fence or fence-equivalent instruction.
3632         //     In more modern implementations MFENCE appears faster, however.
3633         // (3) In lieu of an explicit fence, use lock:addl to the top-of-stack
3634         //     The $lines underlying the top-of-stack should be in M-state.
3635         //     The locked add instruction is serializing, of course.
3636         // (4) Use xchg, which is serializing
3637         //     mov boxReg, 0; xchgl boxReg, [tmpReg + Offset(_owner)-2] also works
3638         // (5) ST m->_owner = 0 and then execute lock:orl &m->_succ, 0.
3639         //     The integer condition codes will tell us if succ was 0.
3640         //     Since _succ and _owner should reside in the same $line and
3641         //     we just stored into _owner, it's likely that the $line
3642         //     remains in M-state for the lock:orl.
3643         //
3644         // We currently use (3), although it's likely that switching to (2)
3645         // is correct for the future.
3646            
3647         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), NULL_WORD) ; 
3648         if (os::is_MP()) { 
3649            if (VM_Version::supports_sse2() && 1 == FenceInstruction) { 
3650              masm.mfence();
3651            } else { 
3652              masm.lock () ; masm.addptr(Address(rsp, 0), 0) ; 
3653            }
3654         }
3655         // Ratify _succ remains non-null
3656         masm.cmpptr(Address (tmpReg, ObjectMonitor::succ_offset_in_bytes()-2), 0) ; 
3657         masm.jccb  (Assembler::notZero, LSuccess) ; 
3658
3659         masm.xorptr(boxReg, boxReg) ;                  // box is really EAX
3660         if (os::is_MP()) { masm.lock(); }
3661         masm.cmpxchgptr(rsp, Address(tmpReg, ObjectMonitor::owner_offset_in_bytes()-2));
3662         masm.jccb  (Assembler::notEqual, LSuccess) ;
3663         // Since we're low on registers we installed rsp as a placeholding in _owner.
3664         // Now install Self over rsp.  This is safe as we're transitioning from
3665         // non-null to non=null
3666         masm.get_thread (boxReg) ;
3667         masm.movptr(Address (tmpReg, ObjectMonitor::owner_offset_in_bytes()-2), boxReg) ;
3668         // Intentional fall-through into LGoSlowPath ...
3669
3670         masm.bind  (LGoSlowPath) ; 
3671         masm.orptr(boxReg, 1) ;                      // set ICC.ZF=0 to indicate failure
3672         masm.jmpb  (DONE_LABEL) ; 
3673
3674         masm.bind  (LSuccess) ; 
3675         masm.xorptr(boxReg, boxReg) ;                 // set ICC.ZF=1 to indicate success
3676         masm.jmpb  (DONE_LABEL) ; 
3677      }
3678
3679      masm.bind (Stacked) ;
3680      // It's not inflated and it's not recursively stack-locked and it's not biased.
3681      // It must be stack-locked.
3682      // Try to reset the header to displaced header.
3683      // The "box" value on the stack is stable, so we can reload
3684      // and be assured we observe the same value as above.
3685      masm.movptr(tmpReg, Address(boxReg, 0)) ;
3686      if (os::is_MP()) {   masm.lock();    }
3687      masm.cmpxchgptr(tmpReg, Address(objReg, 0)); // Uses EAX which is box
3688      // Intention fall-thru into DONE_LABEL
3689
3690
3691      // DONE_LABEL is a hot target - we'd really like to place it at the
3692      // start of cache line by padding with NOPs.
3693      // See the AMD and Intel software optimization manuals for the
3694      // most efficient "long" NOP encodings.
3695      // Unfortunately none of our alignment mechanisms suffice.
3696      if ((EmitSync & 65536) == 0) {
3697         masm.bind (CheckSucc) ;
3698      }
3699      masm.bind(DONE_LABEL);
3700
3701      // Avoid branch to branch on AMD processors
3702      if (EmitSync & 32768) { masm.nop() ; }
3703    }
3704  %}
3705
3706  enc_class enc_String_Compare() %{
3707    Label ECX_GOOD_LABEL, LENGTH_DIFF_LABEL,
3708          POP_LABEL, DONE_LABEL, CONT_LABEL,
3709          WHILE_HEAD_LABEL;
3710    MacroAssembler masm(&cbuf);
3711
3712    // Get the first character position in both strings
3713    //         [8] char array, [12] offset, [16] count
3714    int value_offset  = java_lang_String::value_offset_in_bytes();
3715    int offset_offset = java_lang_String::offset_offset_in_bytes();
3716    int count_offset  = java_lang_String::count_offset_in_bytes();
3717    int base_offset   = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3718
3719    masm.movptr(rax, Address(rsi, value_offset));
3720    masm.movl(rcx, Address(rsi, offset_offset));
3721    masm.lea(rax, Address(rax, rcx, Address::times_2, base_offset));
3722    masm.movptr(rbx, Address(rdi, value_offset));
3723    masm.movl(rcx, Address(rdi, offset_offset));
3724    masm.lea(rbx, Address(rbx, rcx, Address::times_2, base_offset));
3725
3726    // Compute the minimum of the string lengths(rsi) and the
3727    // difference of the string lengths (stack)
3728
3729
3730    if (VM_Version::supports_cmov()) {
3731      masm.movl(rdi, Address(rdi, count_offset));
3732      masm.movl(rsi, Address(rsi, count_offset));
3733      masm.movl(rcx, rdi);
3734      masm.subl(rdi, rsi);
3735      masm.push(rdi);
3736      masm.cmovl(Assembler::lessEqual, rsi, rcx);
3737    } else {
3738      masm.movl(rdi, Address(rdi, count_offset));
3739      masm.movl(rcx, Address(rsi, count_offset));
3740      masm.movl(rsi, rdi);
3741      masm.subl(rdi, rcx);
3742      masm.push(rdi);
3743      masm.jcc(Assembler::lessEqual, ECX_GOOD_LABEL);
3744      masm.movl(rsi, rcx);
3745      // rsi holds min, rcx is unused
3746    }
3747
3748    // Is the minimum length zero?
3749    masm.bind(ECX_GOOD_LABEL);
3750    masm.testl(rsi, rsi);
3751    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3752
3753    // Load first characters
3754    masm.load_unsigned_word(rcx, Address(rbx, 0));
3755    masm.load_unsigned_word(rdi, Address(rax, 0));
3756
3757    // Compare first characters
3758    masm.subl(rcx, rdi);
3759    masm.jcc(Assembler::notZero,  POP_LABEL);
3760    masm.decrementl(rsi);
3761    masm.jcc(Assembler::zero, LENGTH_DIFF_LABEL);
3762
3763    {
3764      // Check after comparing first character to see if strings are equivalent
3765      Label LSkip2;
3766      // Check if the strings start at same location
3767      masm.cmpptr(rbx,rax);
3768      masm.jcc(Assembler::notEqual, LSkip2);
3769
3770      // Check if the length difference is zero (from stack)
3771      masm.cmpl(Address(rsp, 0), 0x0);
3772      masm.jcc(Assembler::equal,  LENGTH_DIFF_LABEL);
3773
3774      // Strings might not be equivalent
3775      masm.bind(LSkip2);
3776    }
3777
3778    // Shift rax, and rbx, to the end of the arrays, negate min
3779    masm.lea(rax, Address(rax, rsi, Address::times_2, 2));
3780    masm.lea(rbx, Address(rbx, rsi, Address::times_2, 2));
3781    masm.negl(rsi);
3782
3783    // Compare the rest of the characters
3784    masm.bind(WHILE_HEAD_LABEL);
3785    masm.load_unsigned_word(rcx, Address(rbx, rsi, Address::times_2, 0));
3786    masm.load_unsigned_word(rdi, Address(rax, rsi, Address::times_2, 0));
3787    masm.subl(rcx, rdi);
3788    masm.jcc(Assembler::notZero, POP_LABEL);
3789    masm.incrementl(rsi);
3790    masm.jcc(Assembler::notZero, WHILE_HEAD_LABEL);
3791
3792    // Strings are equal up to min length.  Return the length difference.
3793    masm.bind(LENGTH_DIFF_LABEL);
3794    masm.pop(rcx);
3795    masm.jmp(DONE_LABEL);
3796
3797    // Discard the stored length difference
3798    masm.bind(POP_LABEL);
3799    masm.addptr(rsp, 4);
3800       
3801    // That's it
3802    masm.bind(DONE_LABEL);
3803  %}
3804
3805  enc_class enc_Array_Equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result) %{
3806    Label TRUE_LABEL, FALSE_LABEL, DONE_LABEL, COMPARE_LOOP_HDR, COMPARE_LOOP;
3807    MacroAssembler masm(&cbuf);
3808
3809    Register ary1Reg   = as_Register($ary1$$reg);
3810    Register ary2Reg   = as_Register($ary2$$reg);
3811    Register tmp1Reg   = as_Register($tmp1$$reg);
3812    Register tmp2Reg   = as_Register($tmp2$$reg);
3813    Register resultReg = as_Register($result$$reg);
3814
3815    int length_offset  = arrayOopDesc::length_offset_in_bytes();
3816    int base_offset    = arrayOopDesc::base_offset_in_bytes(T_CHAR);
3817
3818    // Check the input args
3819    masm.cmpl(ary1Reg, ary2Reg);
3820    masm.jcc(Assembler::equal, TRUE_LABEL);
3821    masm.testl(ary1Reg, ary1Reg);
3822    masm.jcc(Assembler::zero, FALSE_LABEL);
3823    masm.testl(ary2Reg, ary2Reg);
3824    masm.jcc(Assembler::zero, FALSE_LABEL);
3825
3826    // Check the lengths
3827    masm.movl(tmp2Reg, Address(ary1Reg, length_offset));
3828    masm.movl(resultReg, Address(ary2Reg, length_offset));
3829    masm.cmpl(tmp2Reg, resultReg);
3830    masm.jcc(Assembler::notEqual, FALSE_LABEL);
3831    masm.testl(resultReg, resultReg);
3832    masm.jcc(Assembler::zero, TRUE_LABEL);
3833
3834    // Get the number of 4 byte vectors to compare
3835    masm.shrl(resultReg, 1);
3836
3837    // Check for odd-length arrays
3838    masm.andl(tmp2Reg, 1);
3839    masm.testl(tmp2Reg, tmp2Reg);
3840    masm.jcc(Assembler::zero, COMPARE_LOOP_HDR);
3841
3842    // Compare 2-byte "tail" at end of arrays
3843    masm.load_unsigned_word(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3844    masm.load_unsigned_word(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3845    masm.cmpl(tmp1Reg, tmp2Reg);
3846    masm.jcc(Assembler::notEqual, FALSE_LABEL);
3847    masm.testl(resultReg, resultReg);
3848    masm.jcc(Assembler::zero, TRUE_LABEL);
3849
3850    // Setup compare loop
3851    masm.bind(COMPARE_LOOP_HDR);
3852    // Shift tmp1Reg and tmp2Reg to the last 4-byte boundary of the arrays
3853    masm.leal(tmp1Reg, Address(ary1Reg, resultReg, Address::times_4, base_offset));
3854    masm.leal(tmp2Reg, Address(ary2Reg, resultReg, Address::times_4, base_offset));
3855    masm.negl(resultReg);
3856
3857    // 4-byte-wide compare loop
3858    masm.bind(COMPARE_LOOP);
3859    masm.movl(ary1Reg, Address(tmp1Reg, resultReg, Address::times_4, 0));
3860    masm.movl(ary2Reg, Address(tmp2Reg, resultReg, Address::times_4, 0));
3861    masm.cmpl(ary1Reg, ary2Reg);
3862    masm.jcc(Assembler::notEqual, FALSE_LABEL);
3863    masm.increment(resultReg);
3864    masm.jcc(Assembler::notZero, COMPARE_LOOP);
3865
3866    masm.bind(TRUE_LABEL);
3867    masm.movl(resultReg, 1);   // return true
3868    masm.jmp(DONE_LABEL);
3869
3870    masm.bind(FALSE_LABEL);
3871    masm.xorl(resultReg, resultReg); // return false
3872
3873    // That's it
3874    masm.bind(DONE_LABEL);
3875  %}
3876
3877  enc_class enc_pop_rdx() %{
3878    emit_opcode(cbuf,0x5A);
3879  %}
3880
3881  enc_class enc_rethrow() %{
3882    cbuf.set_inst_mark();
3883    emit_opcode(cbuf, 0xE9);        // jmp    entry
3884    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.code_end())-4,
3885                   runtime_call_Relocation::spec(), RELOC_IMM32 );
3886  %}
3887
3888
3889  // Convert a double to an int.  Java semantics require we do complex
3890  // manglelations in the corner cases.  So we set the rounding mode to
3891  // 'zero', store the darned double down as an int, and reset the
3892  // rounding mode to 'nearest'.  The hardware throws an exception which
3893  // patches up the correct value directly to the stack.
3894  enc_class D2I_encoding( regD src ) %{
3895    // Flip to round-to-zero mode.  We attempted to allow invalid-op
3896    // exceptions here, so that a NAN or other corner-case value will
3897    // thrown an exception (but normal values get converted at full speed).
3898    // However, I2C adapters and other float-stack manglers leave pending
3899    // invalid-op exceptions hanging.  We would have to clear them before
3900    // enabling them and that is more expensive than just testing for the
3901    // invalid value Intel stores down in the corner cases.
3902    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3903    emit_opcode(cbuf,0x2D);
3904    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3905    // Allocate a word
3906    emit_opcode(cbuf,0x83);            // SUB ESP,4
3907    emit_opcode(cbuf,0xEC);
3908    emit_d8(cbuf,0x04);
3909    // Encoding assumes a double has been pushed into FPR0.
3910    // Store down the double as an int, popping the FPU stack
3911    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
3912    emit_opcode(cbuf,0x1C);
3913    emit_d8(cbuf,0x24);
3914    // Restore the rounding mode; mask the exception
3915    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3916    emit_opcode(cbuf,0x2D);
3917    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3918        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3919        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3920
3921    // Load the converted int; adjust CPU stack
3922    emit_opcode(cbuf,0x58);       // POP EAX
3923    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
3924    emit_d32   (cbuf,0x80000000); //         0x80000000
3925    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3926    emit_d8    (cbuf,0x07);       // Size of slow_call
3927    // Push src onto stack slow-path
3928    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3929    emit_d8    (cbuf,0xC0-1+$src$$reg );
3930    // CALL directly to the runtime
3931    cbuf.set_inst_mark();
3932    emit_opcode(cbuf,0xE8);       // Call into runtime
3933    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3934    // Carry on here...
3935  %}
3936
3937  enc_class D2L_encoding( regD src ) %{
3938    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3939    emit_opcode(cbuf,0x2D);
3940    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3941    // Allocate a word
3942    emit_opcode(cbuf,0x83);            // SUB ESP,8
3943    emit_opcode(cbuf,0xEC);
3944    emit_d8(cbuf,0x08);
3945    // Encoding assumes a double has been pushed into FPR0.
3946    // Store down the double as a long, popping the FPU stack
3947    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3948    emit_opcode(cbuf,0x3C);
3949    emit_d8(cbuf,0x24);
3950    // Restore the rounding mode; mask the exception
3951    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3952    emit_opcode(cbuf,0x2D);
3953    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3954        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3955        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3956
3957    // Load the converted int; adjust CPU stack
3958    emit_opcode(cbuf,0x58);       // POP EAX
3959    emit_opcode(cbuf,0x5A);       // POP EDX
3960    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3961    emit_d8    (cbuf,0xFA);       // rdx
3962    emit_d32   (cbuf,0x80000000); //         0x80000000
3963    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3964    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3965    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3966    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3967    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3968    emit_d8    (cbuf,0x07);       // Size of slow_call
3969    // Push src onto stack slow-path
3970    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3971    emit_d8    (cbuf,0xC0-1+$src$$reg );
3972    // CALL directly to the runtime
3973    cbuf.set_inst_mark();
3974    emit_opcode(cbuf,0xE8);       // Call into runtime
3975    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3976    // Carry on here...
3977  %}
3978
3979  enc_class X2L_encoding( regX src ) %{
3980    // Allocate a word
3981    emit_opcode(cbuf,0x83);      // SUB ESP,8
3982    emit_opcode(cbuf,0xEC);
3983    emit_d8(cbuf,0x08);
3984
3985    emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
3986    emit_opcode  (cbuf, 0x0F );
3987    emit_opcode  (cbuf, 0x11 );
3988    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
3989
3990    emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
3991    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
3992
3993    emit_opcode(cbuf,0xD9);      // FLDCW  trunc
3994    emit_opcode(cbuf,0x2D);
3995    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3996
3997    // Encoding assumes a double has been pushed into FPR0.
3998    // Store down the double as a long, popping the FPU stack
3999    emit_opcode(cbuf,0xDF);      // FISTP [ESP]
4000    emit_opcode(cbuf,0x3C);
4001    emit_d8(cbuf,0x24);
4002
4003    // Restore the rounding mode; mask the exception
4004    emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
4005    emit_opcode(cbuf,0x2D);
4006    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4007      ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4008      : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4009
4010    // Load the converted int; adjust CPU stack
4011    emit_opcode(cbuf,0x58);      // POP EAX
4012
4013    emit_opcode(cbuf,0x5A);      // POP EDX
4014
4015    emit_opcode(cbuf,0x81);      // CMP EDX,imm
4016    emit_d8    (cbuf,0xFA);      // rdx
4017    emit_d32   (cbuf,0x80000000);//         0x80000000
4018
4019    emit_opcode(cbuf,0x75);      // JNE around_slow_call
4020    emit_d8    (cbuf,0x13+4);    // Size of slow_call
4021
4022    emit_opcode(cbuf,0x85);      // TEST EAX,EAX
4023    emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
4024
4025    emit_opcode(cbuf,0x75);      // JNE around_slow_call
4026    emit_d8    (cbuf,0x13);      // Size of slow_call
4027
4028    // Allocate a word
4029    emit_opcode(cbuf,0x83);      // SUB ESP,4
4030    emit_opcode(cbuf,0xEC);
4031    emit_d8(cbuf,0x04);
4032
4033    emit_opcode  (cbuf, 0xF3 );  // MOVSS [ESP], src
4034    emit_opcode  (cbuf, 0x0F );
4035    emit_opcode  (cbuf, 0x11 );
4036    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4037
4038    emit_opcode(cbuf,0xD9 );     // FLD_S [ESP]
4039    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4040
4041    emit_opcode(cbuf,0x83);      // ADD ESP,4
4042    emit_opcode(cbuf,0xC4);
4043    emit_d8(cbuf,0x04);
4044
4045    // CALL directly to the runtime
4046    cbuf.set_inst_mark();
4047    emit_opcode(cbuf,0xE8);       // Call into runtime
4048    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4049    // Carry on here...
4050  %}
4051
4052  enc_class XD2L_encoding( regXD src ) %{
4053    // Allocate a word
4054    emit_opcode(cbuf,0x83);      // SUB ESP,8
4055    emit_opcode(cbuf,0xEC);
4056    emit_d8(cbuf,0x08);
4057
4058    emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4059    emit_opcode  (cbuf, 0x0F );
4060    emit_opcode  (cbuf, 0x11 );
4061    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4062
4063    emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4064    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4065
4066    emit_opcode(cbuf,0xD9);      // FLDCW  trunc
4067    emit_opcode(cbuf,0x2D);
4068    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
4069
4070    // Encoding assumes a double has been pushed into FPR0.
4071    // Store down the double as a long, popping the FPU stack
4072    emit_opcode(cbuf,0xDF);      // FISTP [ESP]
4073    emit_opcode(cbuf,0x3C);
4074    emit_d8(cbuf,0x24);
4075
4076    // Restore the rounding mode; mask the exception
4077    emit_opcode(cbuf,0xD9);      // FLDCW   std/24-bit mode
4078    emit_opcode(cbuf,0x2D);
4079    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
4080      ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
4081      : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
4082
4083    // Load the converted int; adjust CPU stack
4084    emit_opcode(cbuf,0x58);      // POP EAX
4085
4086    emit_opcode(cbuf,0x5A);      // POP EDX
4087
4088    emit_opcode(cbuf,0x81);      // CMP EDX,imm
4089    emit_d8    (cbuf,0xFA);      // rdx
4090    emit_d32   (cbuf,0x80000000); //         0x80000000
4091
4092    emit_opcode(cbuf,0x75);      // JNE around_slow_call
4093    emit_d8    (cbuf,0x13+4);    // Size of slow_call
4094
4095    emit_opcode(cbuf,0x85);      // TEST EAX,EAX
4096    emit_opcode(cbuf,0xC0);      // 2/rax,/rax,
4097
4098    emit_opcode(cbuf,0x75);      // JNE around_slow_call
4099    emit_d8    (cbuf,0x13);      // Size of slow_call
4100
4101    // Push src onto stack slow-path
4102    // Allocate a word
4103    emit_opcode(cbuf,0x83);      // SUB ESP,8
4104    emit_opcode(cbuf,0xEC);
4105    emit_d8(cbuf,0x08);
4106
4107    emit_opcode  (cbuf, 0xF2 );  // MOVSD [ESP], src
4108    emit_opcode  (cbuf, 0x0F );
4109    emit_opcode  (cbuf, 0x11 );
4110    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4111
4112    emit_opcode(cbuf,0xDD );     // FLD_D [ESP]
4113    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4114
4115    emit_opcode(cbuf,0x83);      // ADD ESP,8
4116    emit_opcode(cbuf,0xC4);
4117    emit_d8(cbuf,0x08);
4118
4119    // CALL directly to the runtime
4120    cbuf.set_inst_mark();
4121    emit_opcode(cbuf,0xE8);      // Call into runtime
4122    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4123    // Carry on here...
4124  %}
4125
4126  enc_class D2X_encoding( regX dst, regD src ) %{
4127    // Allocate a word
4128    emit_opcode(cbuf,0x83);            // SUB ESP,4
4129    emit_opcode(cbuf,0xEC);
4130    emit_d8(cbuf,0x04);
4131    int pop = 0x02;
4132    if ($src$$reg != FPR1L_enc) {
4133      emit_opcode( cbuf, 0xD9 );       // FLD    ST(i-1)
4134      emit_d8( cbuf, 0xC0-1+$src$$reg );
4135      pop = 0x03;
4136    }
4137    store_to_stackslot( cbuf, 0xD9, pop, 0 ); // FST<P>_S  [ESP]
4138
4139    emit_opcode  (cbuf, 0xF3 );        // MOVSS dst(xmm), [ESP]
4140    emit_opcode  (cbuf, 0x0F );
4141    emit_opcode  (cbuf, 0x10 );
4142    encode_RegMem(cbuf, $dst$$reg, ESP_enc, 0x4, 0, 0, false);
4143
4144    emit_opcode(cbuf,0x83);            // ADD ESP,4
4145    emit_opcode(cbuf,0xC4);
4146    emit_d8(cbuf,0x04);
4147    // Carry on here...
4148  %}
4149
4150  enc_class FX2I_encoding( regX src, eRegI dst ) %{
4151    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
4152
4153    // Compare the result to see if we need to go to the slow path
4154    emit_opcode(cbuf,0x81);       // CMP dst,imm
4155    emit_rm    (cbuf,0x3,0x7,$dst$$reg);
4156    emit_d32   (cbuf,0x80000000); //         0x80000000
4157
4158    emit_opcode(cbuf,0x75);       // JNE around_slow_call
4159    emit_d8    (cbuf,0x13);       // Size of slow_call
4160    // Store xmm to a temp memory
4161    // location and push it onto stack.
4162
4163    emit_opcode(cbuf,0x83);  // SUB ESP,4
4164    emit_opcode(cbuf,0xEC);
4165    emit_d8(cbuf, $primary ? 0x8 : 0x4);
4166
4167    emit_opcode  (cbuf, $primary ? 0xF2 : 0xF3 );   // MOVSS [ESP], xmm
4168    emit_opcode  (cbuf, 0x0F );
4169    emit_opcode  (cbuf, 0x11 );
4170    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4171
4172    emit_opcode(cbuf, $primary ? 0xDD : 0xD9 );      // FLD [ESP]
4173    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4174
4175    emit_opcode(cbuf,0x83);    // ADD ESP,4
4176    emit_opcode(cbuf,0xC4);
4177    emit_d8(cbuf, $primary ? 0x8 : 0x4);
4178
4179    // CALL directly to the runtime
4180    cbuf.set_inst_mark();
4181    emit_opcode(cbuf,0xE8);       // Call into runtime
4182    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.code_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
4183
4184    // Carry on here...
4185  %}
4186
4187  enc_class X2D_encoding( regD dst, regX src ) %{
4188    // Allocate a word
4189    emit_opcode(cbuf,0x83);     // SUB ESP,4
4190    emit_opcode(cbuf,0xEC);
4191    emit_d8(cbuf,0x04);
4192
4193    emit_opcode  (cbuf, 0xF3 ); // MOVSS [ESP], xmm
4194    emit_opcode  (cbuf, 0x0F );
4195    emit_opcode  (cbuf, 0x11 );
4196    encode_RegMem(cbuf, $src$$reg, ESP_enc, 0x4, 0, 0, false);
4197
4198    emit_opcode(cbuf,0xD9 );    // FLD_S [ESP]
4199    encode_RegMem(cbuf, 0x0, ESP_enc, 0x4, 0, 0, false);
4200
4201    emit_opcode(cbuf,0x83);     // ADD ESP,4
4202    emit_opcode(cbuf,0xC4);
4203    emit_d8(cbuf,0x04);
4204
4205    // Carry on here...
4206  %}
4207
4208  enc_class AbsXF_encoding(regX dst) %{
4209    address signmask_address=(address)float_signmask_pool;
4210    // andpd:\tANDPS  $dst,[signconst]
4211    emit_opcode(cbuf, 0x0F);
4212    emit_opcode(cbuf, 0x54);
4213    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4214    emit_d32(cbuf, (int)signmask_address);
4215  %}
4216
4217  enc_class AbsXD_encoding(regXD dst) %{
4218    address signmask_address=(address)double_signmask_pool;
4219    // andpd:\tANDPD  $dst,[signconst]
4220    emit_opcode(cbuf, 0x66);
4221    emit_opcode(cbuf, 0x0F);
4222    emit_opcode(cbuf, 0x54);
4223    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4224    emit_d32(cbuf, (int)signmask_address);
4225  %}
4226
4227  enc_class NegXF_encoding(regX dst) %{
4228    address signmask_address=(address)float_signflip_pool;
4229    // andpd:\tXORPS  $dst,[signconst]
4230    emit_opcode(cbuf, 0x0F);
4231    emit_opcode(cbuf, 0x57);
4232    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4233    emit_d32(cbuf, (int)signmask_address);
4234  %}
4235
4236  enc_class NegXD_encoding(regXD dst) %{
4237    address signmask_address=(address)double_signflip_pool;
4238    // andpd:\tXORPD  $dst,[signconst]
4239    emit_opcode(cbuf, 0x66);
4240    emit_opcode(cbuf, 0x0F);
4241    emit_opcode(cbuf, 0x57);
4242    emit_rm(cbuf, 0x0, $dst$$reg, 0x5);
4243    emit_d32(cbuf, (int)signmask_address);
4244  %}
4245
4246  enc_class FMul_ST_reg( eRegF src1 ) %{
4247    // Operand was loaded from memory into fp ST (stack top)
4248    // FMUL   ST,$src  /* D8 C8+i */
4249    emit_opcode(cbuf, 0xD8);
4250    emit_opcode(cbuf, 0xC8 + $src1$$reg);
4251  %}
4252
4253  enc_class FAdd_ST_reg( eRegF src2 ) %{
4254    // FADDP  ST,src2  /* D8 C0+i */
4255    emit_opcode(cbuf, 0xD8);
4256    emit_opcode(cbuf, 0xC0 + $src2$$reg);
4257    //could use FADDP  src2,fpST  /* DE C0+i */
4258  %}
4259
4260  enc_class FAddP_reg_ST( eRegF src2 ) %{
4261    // FADDP  src2,ST  /* DE C0+i */
4262    emit_opcode(cbuf, 0xDE);
4263    emit_opcode(cbuf, 0xC0 + $src2$$reg);
4264  %}
4265
4266  enc_class subF_divF_encode( eRegF src1, eRegF src2) %{
4267    // Operand has been loaded into fp ST (stack top)
4268      // FSUB   ST,$src1
4269      emit_opcode(cbuf, 0xD8);
4270      emit_opcode(cbuf, 0xE0 + $src1$$reg);
4271
4272      // FDIV
4273      emit_opcode(cbuf, 0xD8);
4274      emit_opcode(cbuf, 0xF0 + $src2$$reg);
4275  %}
4276
4277  enc_class MulFAddF (eRegF src1, eRegF src2) %{
4278    // Operand was loaded from memory into fp ST (stack top)
4279    // FADD   ST,$src  /* D8 C0+i */
4280    emit_opcode(cbuf, 0xD8);
4281    emit_opcode(cbuf, 0xC0 + $src1$$reg);
4282
4283    // FMUL  ST,src2  /* D8 C*+i */
4284    emit_opcode(cbuf, 0xD8);
4285    emit_opcode(cbuf, 0xC8 + $src2$$reg);
4286  %}
4287
4288
4289  enc_class MulFAddFreverse (eRegF src1, eRegF src2) %{
4290    // Operand was loaded from memory into fp ST (stack top)
4291    // FADD   ST,$src  /* D8 C0+i */
4292    emit_opcode(cbuf, 0xD8);
4293    emit_opcode(cbuf, 0xC0 + $src1$$reg);
4294
4295    // FMULP  src2,ST  /* DE C8+i */
4296    emit_opcode(cbuf, 0xDE);
4297    emit_opcode(cbuf, 0xC8 + $src2$$reg);
4298  %}
4299
4300  enc_class enc_membar_acquire %{
4301    // Doug Lea believes this is not needed with current Sparcs and TSO.
4302    // MacroAssembler masm(&cbuf);
4303    // masm.membar();
4304  %}
4305
4306  enc_class enc_membar_release %{
4307    // Doug Lea believes this is not needed with current Sparcs and TSO.
4308    // MacroAssembler masm(&cbuf);
4309    // masm.membar();
4310  %}
4311
4312  enc_class enc_membar_volatile %{
4313    MacroAssembler masm(&cbuf);
4314    masm.membar(Assembler::Membar_mask_bits(Assembler::StoreLoad |
4315                                            Assembler::StoreStore));
4316  %}
4317
4318  // Atomically load the volatile long
4319  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
4320    emit_opcode(cbuf,0xDF);
4321    int rm_byte_opcode = 0x05;
4322    int base     = $mem$$base;
4323    int index    = $mem$$index;
4324    int scale    = $mem$$scale;
4325    int displace = $mem$$disp;
4326    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4327    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4328    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
4329  %}
4330
4331  enc_class enc_loadLX_volatile( memory mem, stackSlotL dst, regXD tmp ) %{
4332    { // Atomic long load
4333      // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4334      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4335      emit_opcode(cbuf,0x0F);
4336      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4337      int base     = $mem$$base;
4338      int index    = $mem$$index;
4339      int scale    = $mem$$scale;
4340      int displace = $mem$$disp;
4341      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4342      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4343    }
4344    { // MOVSD $dst,$tmp ! atomic long store
4345      emit_opcode(cbuf,0xF2);
4346      emit_opcode(cbuf,0x0F);
4347      emit_opcode(cbuf,0x11);
4348      int base     = $dst$$base;
4349      int index    = $dst$$index;
4350      int scale    = $dst$$scale;
4351      int displace = $dst$$disp;
4352      bool disp_is_oop = $dst->disp_is_oop(); // disp-as-oop when working with static globals
4353      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4354    }
4355  %}
4356
4357  enc_class enc_loadLX_reg_volatile( memory mem, eRegL dst, regXD tmp ) %{
4358    { // Atomic long load
4359      // UseXmmLoadAndClearUpper ? movsd $tmp,$mem : movlpd $tmp,$mem
4360      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4361      emit_opcode(cbuf,0x0F);
4362      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4363      int base     = $mem$$base;
4364      int index    = $mem$$index;
4365      int scale    = $mem$$scale;
4366      int displace = $mem$$disp;
4367      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4368      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4369    }
4370    { // MOVD $dst.lo,$tmp
4371      emit_opcode(cbuf,0x66);
4372      emit_opcode(cbuf,0x0F);
4373      emit_opcode(cbuf,0x7E);
4374      emit_rm(cbuf, 0x3, $tmp$$reg, $dst$$reg);
4375    }
4376    { // PSRLQ $tmp,32
4377      emit_opcode(cbuf,0x66);
4378      emit_opcode(cbuf,0x0F);
4379      emit_opcode(cbuf,0x73);
4380      emit_rm(cbuf, 0x3, 0x02, $tmp$$reg);
4381      emit_d8(cbuf, 0x20);
4382    }
4383    { // MOVD $dst.hi,$tmp
4384      emit_opcode(cbuf,0x66);
4385      emit_opcode(cbuf,0x0F);
4386      emit_opcode(cbuf,0x7E);
4387      emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg));
4388    }
4389  %}
4390
4391  // Volatile Store Long.  Must be atomic, so move it into
4392  // the FP TOS and then do a 64-bit FIST.  Has to probe the
4393  // target address before the store (for null-ptr checks)
4394  // so the memory operand is used twice in the encoding.
4395  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
4396    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
4397    cbuf.set_inst_mark();            // Mark start of FIST in case $mem has an oop
4398    emit_opcode(cbuf,0xDF);
4399    int rm_byte_opcode = 0x07;
4400    int base     = $mem$$base;
4401    int index    = $mem$$index;
4402    int scale    = $mem$$scale;
4403    int displace = $mem$$disp;
4404    bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4405    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_is_oop);
4406  %}
4407
4408  enc_class enc_storeLX_volatile( memory mem, stackSlotL src, regXD tmp) %{
4409    { // Atomic long load
4410      // UseXmmLoadAndClearUpper ? movsd $tmp,[$src] : movlpd $tmp,[$src]
4411      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0xF2 : 0x66);
4412      emit_opcode(cbuf,0x0F);
4413      emit_opcode(cbuf,UseXmmLoadAndClearUpper ? 0x10 : 0x12);
4414      int base     = $src$$base;
4415      int index    = $src$$index;
4416      int scale    = $src$$scale;
4417      int displace = $src$$disp;
4418      bool disp_is_oop = $src->disp_is_oop(); // disp-as-oop when working with static globals
4419      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4420    }
4421    cbuf.set_inst_mark();            // Mark start of MOVSD in case $mem has an oop
4422    { // MOVSD $mem,$tmp ! atomic long store
4423      emit_opcode(cbuf,0xF2);
4424      emit_opcode(cbuf,0x0F);
4425      emit_opcode(cbuf,0x11);
4426      int base     = $mem$$base;
4427      int index    = $mem$$index;
4428      int scale    = $mem$$scale;
4429      int displace = $mem$$disp;
4430      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4431      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4432    }
4433  %}
4434
4435  enc_class enc_storeLX_reg_volatile( memory mem, eRegL src, regXD tmp, regXD tmp2) %{
4436    { // MOVD $tmp,$src.lo
4437      emit_opcode(cbuf,0x66);
4438      emit_opcode(cbuf,0x0F);
4439      emit_opcode(cbuf,0x6E);
4440      emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
4441    }
4442    { // MOVD $tmp2,$src.hi
4443      emit_opcode(cbuf,0x66);
4444      emit_opcode(cbuf,0x0F);
4445      emit_opcode(cbuf,0x6E);
4446      emit_rm(cbuf, 0x3, $tmp2$$reg, HIGH_FROM_LOW($src$$reg));
4447    }
4448    { // PUNPCKLDQ $tmp,$tmp2
4449      emit_opcode(cbuf,0x66);
4450      emit_opcode(cbuf,0x0F);
4451      emit_opcode(cbuf,0x62);
4452      emit_rm(cbuf, 0x3, $tmp$$reg, $tmp2$$reg);
4453    }
4454    cbuf.set_inst_mark();            // Mark start of MOVSD in case $mem has an oop
4455    { // MOVSD $mem,$tmp ! atomic long store
4456      emit_opcode(cbuf,0xF2);
4457      emit_opcode(cbuf,0x0F);
4458      emit_opcode(cbuf,0x11);
4459      int base     = $mem$$base;
4460      int index    = $mem$$index;
4461      int scale    = $mem$$scale;
4462      int displace = $mem$$disp;
4463      bool disp_is_oop = $mem->disp_is_oop(); // disp-as-oop when working with static globals
4464      encode_RegMem(cbuf, $tmp$$reg, base, index, scale, displace, disp_is_oop);
4465    }
4466  %}
4467
4468  // Safepoint Poll.  This polls the safepoint page, and causes an
4469  // exception if it is not readable. Unfortunately, it kills the condition code
4470  // in the process
4471  // We current use TESTL [spp],EDI
4472  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
4473
4474  enc_class Safepoint_Poll() %{
4475    cbuf.relocate(cbuf.inst_mark(), relocInfo::poll_type, 0);
4476    emit_opcode(cbuf,0x85);
4477    emit_rm (cbuf, 0x0, 0x7, 0x5);
4478    emit_d32(cbuf, (intptr_t)os::get_polling_page());
4479  %}
4480%}
4481
4482
4483//----------FRAME--------------------------------------------------------------
4484// Definition of frame structure and management information.
4485//
4486//  S T A C K   L A Y O U T    Allocators stack-slot number
4487//                             |   (to get allocators register number
4488//  G  Owned by    |        |  v    add OptoReg::stack0())
4489//  r   CALLER     |        |
4490//  o     |        +--------+      pad to even-align allocators stack-slot
4491//  w     V        |  pad0  |        numbers; owned by CALLER
4492//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
4493//  h     ^        |   in   |  5
4494//        |        |  args  |  4   Holes in incoming args owned by SELF
4495//  |     |        |        |  3
4496//  |     |        +--------+
4497//  V     |        | old out|      Empty on Intel, window on Sparc
4498//        |    old |preserve|      Must be even aligned.
4499//        |     SP-+--------+----> Matcher::_old_SP, even aligned
4500//        |        |   in   |  3   area for Intel ret address
4501//     Owned by    |preserve|      Empty on Sparc.
4502//       SELF      +--------+
4503//        |        |  pad2  |  2   pad to align old SP
4504//        |        +--------+  1
4505//        |        | locks  |  0
4506//        |        +--------+----> OptoReg::stack0(), even aligned
4507//        |        |  pad1  | 11   pad to align new SP
4508//        |        +--------+
4509//        |        |        | 10
4510//        |        | spills |  9   spills
4511//        V        |        |  8   (pad0 slot for callee)
4512//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
4513//        ^        |  out   |  7
4514//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
4515//     Owned by    +--------+
4516//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
4517//        |    new |preserve|      Must be even-aligned.
4518//        |     SP-+--------+----> Matcher::_new_SP, even aligned
4519//        |        |        |
4520//
4521// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
4522//         known from SELF's arguments and the Java calling convention.
4523//         Region 6-7 is determined per call site.
4524// Note 2: If the calling convention leaves holes in the incoming argument
4525//         area, those holes are owned by SELF.  Holes in the outgoing area
4526//         are owned by the CALLEE.  Holes should not be nessecary in the
4527//         incoming area, as the Java calling convention is completely under
4528//         the control of the AD file.  Doubles can be sorted and packed to
4529//         avoid holes.  Holes in the outgoing arguments may be nessecary for
4530//         varargs C calling conventions.
4531// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
4532//         even aligned with pad0 as needed.
4533//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
4534//         region 6-11 is even aligned; it may be padded out more so that
4535//         the region from SP to FP meets the minimum stack alignment.
4536
4537frame %{
4538  // What direction does stack grow in (assumed to be same for C & Java)
4539  stack_direction(TOWARDS_LOW);
4540
4541  // These three registers define part of the calling convention
4542  // between compiled code and the interpreter.
4543  inline_cache_reg(EAX);                // Inline Cache Register
4544  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
4545
4546  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
4547  cisc_spilling_operand_name(indOffset32);
4548
4549  // Number of stack slots consumed by locking an object
4550  sync_stack_slots(1);
4551
4552  // Compiled code's Frame Pointer
4553  frame_pointer(ESP);
4554  // Interpreter stores its frame pointer in a register which is
4555  // stored to the stack by I2CAdaptors.
4556  // I2CAdaptors convert from interpreted java to compiled java.
4557  interpreter_frame_pointer(EBP);
4558
4559  // Stack alignment requirement
4560  // Alignment size in bytes (128-bit -> 16 bytes)
4561  stack_alignment(StackAlignmentInBytes);
4562
4563  // Number of stack slots between incoming argument block and the start of
4564  // a new frame.  The PROLOG must add this many slots to the stack.  The
4565  // EPILOG must remove this many slots.  Intel needs one slot for
4566  // return address and one for rbp, (must save rbp)
4567  in_preserve_stack_slots(2+VerifyStackAtCalls);
4568
4569  // Number of outgoing stack slots killed above the out_preserve_stack_slots
4570  // for calls to C.  Supports the var-args backing area for register parms.
4571  varargs_C_out_slots_killed(0);
4572
4573  // The after-PROLOG location of the return address.  Location of
4574  // return address specifies a type (REG or STACK) and a number
4575  // representing the register number (i.e. - use a register name) or
4576  // stack slot.
4577  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
4578  // Otherwise, it is above the locks and verification slot and alignment word
4579  return_addr(STACK - 1 +
4580              round_to(1+VerifyStackAtCalls+
4581              Compile::current()->fixed_slots(),
4582              (StackAlignmentInBytes/wordSize)));
4583
4584  // Body of function which returns an integer array locating
4585  // arguments either in registers or in stack slots.  Passed an array
4586  // of ideal registers called "sig" and a "length" count.  Stack-slot
4587  // offsets are based on outgoing arguments, i.e. a CALLER setting up
4588  // arguments for a CALLEE.  Incoming stack arguments are
4589  // automatically biased by the preserve_stack_slots field above.
4590  calling_convention %{
4591    // No difference between ingoing/outgoing just pass false
4592    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
4593  %}
4594
4595
4596  // Body of function which returns an integer array locating
4597  // arguments either in registers or in stack slots.  Passed an array
4598  // of ideal registers called "sig" and a "length" count.  Stack-slot
4599  // offsets are based on outgoing arguments, i.e. a CALLER setting up
4600  // arguments for a CALLEE.  Incoming stack arguments are
4601  // automatically biased by the preserve_stack_slots field above.
4602  c_calling_convention %{
4603    // This is obviously always outgoing
4604    (void) SharedRuntime::c_calling_convention(sig_bt, regs, length);
4605  %}
4606
4607  // Location of C & interpreter return values
4608  c_return_value %{
4609    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4610    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4611    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4612
4613    // in SSE2+ mode we want to keep the FPU stack clean so pretend
4614    // that C functions return float and double results in XMM0.
4615    if( ideal_reg == Op_RegD && UseSSE>=2 )
4616      return OptoRegPair(XMM0b_num,XMM0a_num);
4617    if( ideal_reg == Op_RegF && UseSSE>=2 )
4618      return OptoRegPair(OptoReg::Bad,XMM0a_num);
4619
4620    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4621  %}
4622
4623  // Location of return values
4624  return_value %{
4625    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
4626    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
4627    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
4628    if( ideal_reg == Op_RegD && UseSSE>=2 )
4629      return OptoRegPair(XMM0b_num,XMM0a_num);
4630    if( ideal_reg == Op_RegF && UseSSE>=1 )
4631      return OptoRegPair(OptoReg::Bad,XMM0a_num);
4632    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
4633  %}
4634
4635%}
4636
4637//----------ATTRIBUTES---------------------------------------------------------
4638//----------Operand Attributes-------------------------------------------------
4639op_attrib op_cost(0);        // Required cost attribute
4640
4641//----------Instruction Attributes---------------------------------------------
4642ins_attrib ins_cost(100);       // Required cost attribute
4643ins_attrib ins_size(8);         // Required size attribute (in bits)
4644ins_attrib ins_pc_relative(0);  // Required PC Relative flag
4645ins_attrib ins_short_branch(0); // Required flag: is this instruction a
4646                                // non-matching short branch variant of some
4647                                                            // long branch?
4648ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
4649                                // specifies the alignment that some part of the instruction (not
4650                                // necessarily the start) requires.  If > 1, a compute_padding()
4651                                // function must be provided for the instruction
4652
4653//----------OPERANDS-----------------------------------------------------------
4654// Operand definitions must precede instruction definitions for correct parsing
4655// in the ADLC because operands constitute user defined types which are used in
4656// instruction definitions.
4657
4658//----------Simple Operands----------------------------------------------------
4659// Immediate Operands
4660// Integer Immediate
4661operand immI() %{
4662  match(ConI);
4663
4664  op_cost(10);
4665  format %{ %}
4666  interface(CONST_INTER);
4667%}
4668
4669// Constant for test vs zero
4670operand immI0() %{
4671  predicate(n->get_int() == 0);
4672  match(ConI);
4673
4674  op_cost(0);
4675  format %{ %}
4676  interface(CONST_INTER);
4677%}
4678
4679// Constant for increment
4680operand immI1() %{
4681  predicate(n->get_int() == 1);
4682  match(ConI);
4683
4684  op_cost(0);
4685  format %{ %}
4686  interface(CONST_INTER);
4687%}
4688
4689// Constant for decrement
4690operand immI_M1() %{
4691  predicate(n->get_int() == -1);
4692  match(ConI);
4693
4694  op_cost(0);
4695  format %{ %}
4696  interface(CONST_INTER);
4697%}
4698
4699// Valid scale values for addressing modes
4700operand immI2() %{
4701  predicate(0 <= n->get_int() && (n->get_int() <= 3));
4702  match(ConI);
4703
4704  format %{ %}
4705  interface(CONST_INTER);
4706%}
4707
4708operand immI8() %{
4709  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
4710  match(ConI);
4711
4712  op_cost(5);
4713  format %{ %}
4714  interface(CONST_INTER);
4715%}
4716
4717operand immI16() %{
4718  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
4719  match(ConI);
4720
4721  op_cost(10);
4722  format %{ %}
4723  interface(CONST_INTER);
4724%}
4725
4726// Constant for long shifts
4727operand immI_32() %{
4728  predicate( n->get_int() == 32 );
4729  match(ConI);
4730
4731  op_cost(0);
4732  format %{ %}
4733  interface(CONST_INTER);
4734%}
4735
4736operand immI_1_31() %{
4737  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
4738  match(ConI);
4739
4740  op_cost(0);
4741  format %{ %}
4742  interface(CONST_INTER);
4743%}
4744
4745operand immI_32_63() %{
4746  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
4747  match(ConI);
4748  op_cost(0);
4749
4750  format %{ %}
4751  interface(CONST_INTER);
4752%}
4753
4754operand immI_1() %{
4755  predicate( n->get_int() == 1 );
4756  match(ConI);
4757
4758  op_cost(0);
4759  format %{ %}
4760  interface(CONST_INTER);
4761%}
4762
4763operand immI_2() %{
4764  predicate( n->get_int() == 2 );
4765  match(ConI);
4766
4767  op_cost(0);
4768  format %{ %}
4769  interface(CONST_INTER);
4770%}
4771
4772operand immI_3() %{
4773  predicate( n->get_int() == 3 );
4774  match(ConI);
4775
4776  op_cost(0);
4777  format %{ %}
4778  interface(CONST_INTER);
4779%}
4780
4781// Pointer Immediate
4782operand immP() %{
4783  match(ConP);
4784
4785  op_cost(10);
4786  format %{ %}
4787  interface(CONST_INTER);
4788%}
4789
4790// NULL Pointer Immediate
4791operand immP0() %{
4792  predicate( n->get_ptr() == 0 );
4793  match(ConP);
4794  op_cost(0);
4795
4796  format %{ %}
4797  interface(CONST_INTER);
4798%}
4799
4800// Long Immediate
4801operand immL() %{
4802  match(ConL);
4803
4804  op_cost(20);
4805  format %{ %}
4806  interface(CONST_INTER);
4807%}
4808
4809// Long Immediate zero
4810operand immL0() %{
4811  predicate( n->get_long() == 0L );
4812  match(ConL);
4813  op_cost(0);
4814
4815  format %{ %}
4816  interface(CONST_INTER);
4817%}
4818
4819// Long Immediate zero
4820operand immL_M1() %{
4821  predicate( n->get_long() == -1L );
4822  match(ConL);
4823  op_cost(0);
4824
4825  format %{ %}
4826  interface(CONST_INTER);
4827%}
4828
4829// Long immediate from 0 to 127.
4830// Used for a shorter form of long mul by 10.
4831operand immL_127() %{
4832  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
4833  match(ConL);
4834  op_cost(0);
4835
4836  format %{ %}
4837  interface(CONST_INTER);
4838%}
4839
4840// Long Immediate: low 32-bit mask
4841operand immL_32bits() %{
4842  predicate(n->get_long() == 0xFFFFFFFFL);
4843  match(ConL);
4844  op_cost(0);
4845
4846  format %{ %}
4847  interface(CONST_INTER);
4848%}
4849
4850// Long Immediate: low 32-bit mask
4851operand immL32() %{
4852  predicate(n->get_long() == (int)(n->get_long()));
4853  match(ConL);
4854  op_cost(20);
4855
4856  format %{ %}
4857  interface(CONST_INTER);
4858%}
4859
4860//Double Immediate zero
4861operand immD0() %{
4862  // Do additional (and counter-intuitive) test against NaN to work around VC++
4863  // bug that generates code such that NaNs compare equal to 0.0
4864  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
4865  match(ConD);
4866
4867  op_cost(5);
4868  format %{ %}
4869  interface(CONST_INTER);
4870%}
4871
4872// Double Immediate
4873operand immD1() %{
4874  predicate( UseSSE<=1 && n->getd() == 1.0 );
4875  match(ConD);
4876
4877  op_cost(5);
4878  format %{ %}
4879  interface(CONST_INTER);
4880%}
4881
4882// Double Immediate
4883operand immD() %{
4884  predicate(UseSSE<=1);
4885  match(ConD);
4886
4887  op_cost(5);
4888  format %{ %}
4889  interface(CONST_INTER);
4890%}
4891
4892operand immXD() %{
4893  predicate(UseSSE>=2);
4894  match(ConD);
4895
4896  op_cost(5);
4897  format %{ %}
4898  interface(CONST_INTER);
4899%}
4900
4901// Double Immediate zero
4902operand immXD0() %{
4903  // Do additional (and counter-intuitive) test against NaN to work around VC++
4904  // bug that generates code such that NaNs compare equal to 0.0 AND do not
4905  // compare equal to -0.0.
4906  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
4907  match(ConD);
4908
4909  format %{ %}
4910  interface(CONST_INTER);
4911%}
4912
4913// Float Immediate zero
4914operand immF0() %{
4915  predicate( UseSSE == 0 && n->getf() == 0.0 );
4916  match(ConF);
4917
4918  op_cost(5);
4919  format %{ %}
4920  interface(CONST_INTER);
4921%}
4922
4923// Float Immediate
4924operand immF() %{
4925  predicate( UseSSE == 0 );
4926  match(ConF);
4927
4928  op_cost(5);
4929  format %{ %}
4930  interface(CONST_INTER);
4931%}
4932
4933// Float Immediate
4934operand immXF() %{
4935  predicate(UseSSE >= 1);
4936  match(ConF);
4937
4938  op_cost(5);
4939  format %{ %}
4940  interface(CONST_INTER);
4941%}
4942
4943// Float Immediate zero.  Zero and not -0.0
4944operand immXF0() %{
4945  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
4946  match(ConF);
4947
4948  op_cost(5);
4949  format %{ %}
4950  interface(CONST_INTER);
4951%}
4952
4953// Immediates for special shifts (sign extend)
4954
4955// Constants for increment
4956operand immI_16() %{
4957  predicate( n->get_int() == 16 );
4958  match(ConI);
4959
4960  format %{ %}
4961  interface(CONST_INTER);
4962%}
4963
4964operand immI_24() %{
4965  predicate( n->get_int() == 24 );
4966  match(ConI);
4967
4968  format %{ %}
4969  interface(CONST_INTER);
4970%}
4971
4972// Constant for byte-wide masking
4973operand immI_255() %{
4974  predicate( n->get_int() == 255 );
4975  match(ConI);
4976
4977  format %{ %}
4978  interface(CONST_INTER);
4979%}
4980
4981// Register Operands
4982// Integer Register
4983operand eRegI() %{
4984  constraint(ALLOC_IN_RC(e_reg));
4985  match(RegI);
4986  match(xRegI);
4987  match(eAXRegI);
4988  match(eBXRegI);
4989  match(eCXRegI);
4990  match(eDXRegI);
4991  match(eDIRegI);
4992  match(eSIRegI);
4993
4994  format %{ %}
4995  interface(REG_INTER);
4996%}
4997
4998// Subset of Integer Register
4999operand xRegI(eRegI reg) %{
5000  constraint(ALLOC_IN_RC(x_reg));
5001  match(reg);
5002  match(eAXRegI);
5003  match(eBXRegI);
5004  match(eCXRegI);
5005  match(eDXRegI);
5006
5007  format %{ %}
5008  interface(REG_INTER);
5009%}
5010
5011// Special Registers
5012operand eAXRegI(xRegI reg) %{
5013  constraint(ALLOC_IN_RC(eax_reg));
5014  match(reg);
5015  match(eRegI);
5016
5017  format %{ "EAX" %}
5018  interface(REG_INTER);
5019%}
5020
5021// Special Registers
5022operand eBXRegI(xRegI reg) %{
5023  constraint(ALLOC_IN_RC(ebx_reg));
5024  match(reg);
5025  match(eRegI);
5026
5027  format %{ "EBX" %}
5028  interface(REG_INTER);
5029%}
5030
5031operand eCXRegI(xRegI reg) %{
5032  constraint(ALLOC_IN_RC(ecx_reg));
5033  match(reg);
5034  match(eRegI);
5035
5036  format %{ "ECX" %}
5037  interface(REG_INTER);
5038%}
5039
5040operand eDXRegI(xRegI reg) %{
5041  constraint(ALLOC_IN_RC(edx_reg));
5042  match(reg);
5043  match(eRegI);
5044
5045  format %{ "EDX" %}
5046  interface(REG_INTER);
5047%}
5048
5049operand eDIRegI(xRegI reg) %{
5050  constraint(ALLOC_IN_RC(edi_reg));
5051  match(reg);
5052  match(eRegI);
5053
5054  format %{ "EDI" %}
5055  interface(REG_INTER);
5056%}
5057
5058operand naxRegI() %{
5059  constraint(ALLOC_IN_RC(nax_reg));
5060  match(RegI);
5061  match(eCXRegI);
5062  match(eDXRegI);
5063  match(eSIRegI);
5064  match(eDIRegI);
5065
5066  format %{ %}
5067  interface(REG_INTER);
5068%}
5069
5070operand nadxRegI() %{
5071  constraint(ALLOC_IN_RC(nadx_reg));
5072  match(RegI);
5073  match(eBXRegI);
5074  match(eCXRegI);
5075  match(eSIRegI);
5076  match(eDIRegI);
5077
5078  format %{ %}
5079  interface(REG_INTER);
5080%}
5081
5082operand ncxRegI() %{
5083  constraint(ALLOC_IN_RC(ncx_reg));
5084  match(RegI);
5085  match(eAXRegI);
5086  match(eDXRegI);
5087  match(eSIRegI);
5088  match(eDIRegI);
5089
5090  format %{ %}
5091  interface(REG_INTER);
5092%}
5093
5094// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
5095// //
5096operand eSIRegI(xRegI reg) %{
5097   constraint(ALLOC_IN_RC(esi_reg));
5098   match(reg);
5099   match(eRegI);
5100
5101   format %{ "ESI" %}
5102   interface(REG_INTER);
5103%}
5104
5105// Pointer Register
5106operand anyRegP() %{
5107  constraint(ALLOC_IN_RC(any_reg));
5108  match(RegP);
5109  match(eAXRegP);
5110  match(eBXRegP);
5111  match(eCXRegP);
5112  match(eDIRegP);
5113  match(eRegP);
5114
5115  format %{ %}
5116  interface(REG_INTER);
5117%}
5118
5119operand eRegP() %{
5120  constraint(ALLOC_IN_RC(e_reg));
5121  match(RegP);
5122  match(eAXRegP);
5123  match(eBXRegP);
5124  match(eCXRegP);
5125  match(eDIRegP);
5126
5127  format %{ %}
5128  interface(REG_INTER);
5129%}
5130
5131// On windows95, EBP is not safe to use for implicit null tests.
5132operand eRegP_no_EBP() %{
5133  constraint(ALLOC_IN_RC(e_reg_no_rbp));
5134  match(RegP);
5135  match(eAXRegP);
5136  match(eBXRegP);
5137  match(eCXRegP);
5138  match(eDIRegP);
5139
5140  op_cost(100);
5141  format %{ %}
5142  interface(REG_INTER);
5143%}
5144
5145operand naxRegP() %{
5146  constraint(ALLOC_IN_RC(nax_reg));
5147  match(RegP);
5148  match(eBXRegP);
5149  match(eDXRegP);
5150  match(eCXRegP);
5151  match(eSIRegP);
5152  match(eDIRegP);
5153
5154  format %{ %}
5155  interface(REG_INTER);
5156%}
5157
5158operand nabxRegP() %{
5159  constraint(ALLOC_IN_RC(nabx_reg));
5160  match(RegP);
5161  match(eCXRegP);
5162  match(eDXRegP);
5163  match(eSIRegP);
5164  match(eDIRegP);
5165
5166  format %{ %}
5167  interface(REG_INTER);
5168%}
5169
5170operand pRegP() %{
5171  constraint(ALLOC_IN_RC(p_reg));
5172  match(RegP);
5173  match(eBXRegP);
5174  match(eDXRegP);
5175  match(eSIRegP);
5176  match(eDIRegP);
5177
5178  format %{ %}
5179  interface(REG_INTER);
5180%}
5181
5182// Special Registers
5183// Return a pointer value
5184operand eAXRegP(eRegP reg) %{
5185  constraint(ALLOC_IN_RC(eax_reg));
5186  match(reg);
5187  format %{ "EAX" %}
5188  interface(REG_INTER);
5189%}
5190
5191// Used in AtomicAdd
5192operand eBXRegP(eRegP reg) %{
5193  constraint(ALLOC_IN_RC(ebx_reg));
5194  match(reg);
5195  format %{ "EBX" %}
5196  interface(REG_INTER);
5197%}
5198
5199// Tail-call (interprocedural jump) to interpreter
5200operand eCXRegP(eRegP reg) %{
5201  constraint(ALLOC_IN_RC(ecx_reg));
5202  match(reg);
5203  format %{ "ECX" %}
5204  interface(REG_INTER);
5205%}
5206
5207operand eSIRegP(eRegP reg) %{
5208  constraint(ALLOC_IN_RC(esi_reg));
5209  match(reg);
5210  format %{ "ESI" %}
5211  interface(REG_INTER);
5212%}
5213
5214// Used in rep stosw
5215operand eDIRegP(eRegP reg) %{
5216  constraint(ALLOC_IN_RC(edi_reg));
5217  match(reg);
5218  format %{ "EDI" %}
5219  interface(REG_INTER);
5220%}
5221
5222operand eBPRegP() %{
5223  constraint(ALLOC_IN_RC(ebp_reg));
5224  match(RegP);
5225  format %{ "EBP" %}
5226  interface(REG_INTER);
5227%}
5228
5229operand eRegL() %{
5230  constraint(ALLOC_IN_RC(long_reg));
5231  match(RegL);
5232  match(eADXRegL);
5233
5234  format %{ %}
5235  interface(REG_INTER);
5236%}
5237
5238operand eADXRegL( eRegL reg ) %{
5239  constraint(ALLOC_IN_RC(eadx_reg));
5240  match(reg);
5241
5242  format %{ "EDX:EAX" %}
5243  interface(REG_INTER);
5244%}
5245
5246operand eBCXRegL( eRegL reg ) %{
5247  constraint(ALLOC_IN_RC(ebcx_reg));
5248  match(reg);
5249
5250  format %{ "EBX:ECX" %}
5251  interface(REG_INTER);
5252%}
5253
5254// Special case for integer high multiply
5255operand eADXRegL_low_only() %{
5256  constraint(ALLOC_IN_RC(eadx_reg));
5257  match(RegL);
5258
5259  format %{ "EAX" %}
5260  interface(REG_INTER);
5261%}
5262
5263// Flags register, used as output of compare instructions
5264operand eFlagsReg() %{
5265  constraint(ALLOC_IN_RC(int_flags));
5266  match(RegFlags);
5267
5268  format %{ "EFLAGS" %}
5269  interface(REG_INTER);
5270%}
5271
5272// Flags register, used as output of FLOATING POINT compare instructions
5273operand eFlagsRegU() %{
5274  constraint(ALLOC_IN_RC(int_flags));
5275  match(RegFlags);
5276
5277  format %{ "EFLAGS_U" %}
5278  interface(REG_INTER);
5279%}
5280
5281operand eFlagsRegUCF() %{
5282  constraint(ALLOC_IN_RC(int_flags));
5283  match(RegFlags);
5284  predicate(false);
5285
5286  format %{ "EFLAGS_U_CF" %}
5287  interface(REG_INTER);
5288%}
5289
5290// Condition Code Register used by long compare
5291operand flagsReg_long_LTGE() %{
5292  constraint(ALLOC_IN_RC(int_flags));
5293  match(RegFlags);
5294  format %{ "FLAGS_LTGE" %}
5295  interface(REG_INTER);
5296%}
5297operand flagsReg_long_EQNE() %{
5298  constraint(ALLOC_IN_RC(int_flags));
5299  match(RegFlags);
5300  format %{ "FLAGS_EQNE" %}
5301  interface(REG_INTER);
5302%}
5303operand flagsReg_long_LEGT() %{
5304  constraint(ALLOC_IN_RC(int_flags));
5305  match(RegFlags);
5306  format %{ "FLAGS_LEGT" %}
5307  interface(REG_INTER);
5308%}
5309
5310// Float register operands
5311operand regD() %{
5312  predicate( UseSSE < 2 );
5313  constraint(ALLOC_IN_RC(dbl_reg));
5314  match(RegD);
5315  match(regDPR1);
5316  match(regDPR2);
5317  format %{ %}
5318  interface(REG_INTER);
5319%}
5320
5321operand regDPR1(regD reg) %{
5322  predicate( UseSSE < 2 );
5323  constraint(ALLOC_IN_RC(dbl_reg0));
5324  match(reg);
5325  format %{ "FPR1" %}
5326  interface(REG_INTER);
5327%}
5328
5329operand regDPR2(regD reg) %{
5330  predicate( UseSSE < 2 );
5331  constraint(ALLOC_IN_RC(dbl_reg1));
5332  match(reg);
5333  format %{ "FPR2" %}
5334  interface(REG_INTER);
5335%}
5336
5337operand regnotDPR1(regD reg) %{
5338  predicate( UseSSE < 2 );
5339  constraint(ALLOC_IN_RC(dbl_notreg0));
5340  match(reg);
5341  format %{ %}
5342  interface(REG_INTER);
5343%}
5344
5345// XMM Double register operands
5346operand regXD() %{
5347  predicate( UseSSE>=2 );
5348  constraint(ALLOC_IN_RC(xdb_reg));
5349  match(RegD);
5350  match(regXD6);
5351  match(regXD7);
5352  format %{ %}
5353  interface(REG_INTER);
5354%}
5355
5356// XMM6 double register operands
5357operand regXD6(regXD reg) %{
5358  predicate( UseSSE>=2 );
5359  constraint(ALLOC_IN_RC(xdb_reg6));
5360  match(reg);
5361  format %{ "XMM6" %}
5362  interface(REG_INTER);
5363%}
5364
5365// XMM7 double register operands
5366operand regXD7(regXD reg) %{
5367  predicate( UseSSE>=2 );
5368  constraint(ALLOC_IN_RC(xdb_reg7));
5369  match(reg);
5370  format %{ "XMM7" %}
5371  interface(REG_INTER);
5372%}
5373
5374// Float register operands
5375operand regF() %{
5376  predicate( UseSSE < 2 );
5377  constraint(ALLOC_IN_RC(flt_reg));
5378  match(RegF);
5379  match(regFPR1);
5380  format %{ %}
5381  interface(REG_INTER);
5382%}
5383
5384// Float register operands
5385operand regFPR1(regF reg) %{
5386  predicate( UseSSE < 2 );
5387  constraint(ALLOC_IN_RC(flt_reg0));
5388  match(reg);
5389  format %{ "FPR1" %}
5390  interface(REG_INTER);
5391%}
5392
5393// XMM register operands
5394operand regX() %{
5395  predicate( UseSSE>=1 );
5396  constraint(ALLOC_IN_RC(xmm_reg));
5397  match(RegF);
5398  format %{ %}
5399  interface(REG_INTER);
5400%}
5401
5402
5403//----------Memory Operands----------------------------------------------------
5404// Direct Memory Operand
5405operand direct(immP addr) %{
5406  match(addr);
5407
5408  format %{ "[$addr]" %}
5409  interface(MEMORY_INTER) %{
5410    base(0xFFFFFFFF);
5411    index(0x4);
5412    scale(0x0);
5413    disp($addr);
5414  %}
5415%}
5416
5417// Indirect Memory Operand
5418operand indirect(eRegP reg) %{
5419  constraint(ALLOC_IN_RC(e_reg));
5420  match(reg);
5421
5422  format %{ "[$reg]" %}
5423  interface(MEMORY_INTER) %{
5424    base($reg);
5425    index(0x4);
5426    scale(0x0);
5427    disp(0x0);
5428  %}
5429%}
5430
5431// Indirect Memory Plus Short Offset Operand
5432operand indOffset8(eRegP reg, immI8 off) %{
5433  match(AddP reg off);
5434
5435  format %{ "[$reg + $off]" %}
5436  interface(MEMORY_INTER) %{
5437    base($reg);
5438    index(0x4);
5439    scale(0x0);
5440    disp($off);
5441  %}
5442%}
5443
5444// Indirect Memory Plus Long Offset Operand
5445operand indOffset32(eRegP reg, immI off) %{
5446  match(AddP reg off);
5447
5448  format %{ "[$reg + $off]" %}
5449  interface(MEMORY_INTER) %{
5450    base($reg);
5451    index(0x4);
5452    scale(0x0);
5453    disp($off);
5454  %}
5455%}
5456
5457// Indirect Memory Plus Long Offset Operand
5458operand indOffset32X(eRegI reg, immP off) %{
5459  match(AddP off reg);
5460
5461  format %{ "[$reg + $off]" %}
5462  interface(MEMORY_INTER) %{
5463    base($reg);
5464    index(0x4);
5465    scale(0x0);
5466    disp($off);
5467  %}
5468%}
5469
5470// Indirect Memory Plus Index Register Plus Offset Operand
5471operand indIndexOffset(eRegP reg, eRegI ireg, immI off) %{
5472  match(AddP (AddP reg ireg) off);
5473
5474  op_cost(10);
5475  format %{"[$reg + $off + $ireg]" %}
5476  interface(MEMORY_INTER) %{
5477    base($reg);
5478    index($ireg);
5479    scale(0x0);
5480    disp($off);
5481  %}
5482%}
5483
5484// Indirect Memory Plus Index Register Plus Offset Operand
5485operand indIndex(eRegP reg, eRegI ireg) %{
5486  match(AddP reg ireg);
5487
5488  op_cost(10);
5489  format %{"[$reg + $ireg]" %}
5490  interface(MEMORY_INTER) %{
5491    base($reg);
5492    index($ireg);
5493    scale(0x0);
5494    disp(0x0);
5495  %}
5496%}
5497
5498// // -------------------------------------------------------------------------
5499// // 486 architecture doesn't support "scale * index + offset" with out a base
5500// // -------------------------------------------------------------------------
5501// // Scaled Memory Operands
5502// // Indirect Memory Times Scale Plus Offset Operand
5503// operand indScaleOffset(immP off, eRegI ireg, immI2 scale) %{
5504//   match(AddP off (LShiftI ireg scale));
5505//
5506//   op_cost(10);
5507//   format %{"[$off + $ireg << $scale]" %}
5508//   interface(MEMORY_INTER) %{
5509//     base(0x4);
5510//     index($ireg);
5511//     scale($scale);
5512//     disp($off);
5513//   %}
5514// %}
5515
5516// Indirect Memory Times Scale Plus Index Register
5517operand indIndexScale(eRegP reg, eRegI ireg, immI2 scale) %{
5518  match(AddP reg (LShiftI ireg scale));
5519
5520  op_cost(10);
5521  format %{"[$reg + $ireg << $scale]" %}
5522  interface(MEMORY_INTER) %{
5523    base($reg);
5524    index($ireg);
5525    scale($scale);
5526    disp(0x0);
5527  %}
5528%}
5529
5530// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5531operand indIndexScaleOffset(eRegP reg, immI off, eRegI ireg, immI2 scale) %{
5532  match(AddP (AddP reg (LShiftI ireg scale)) off);
5533
5534  op_cost(10);
5535  format %{"[$reg + $off + $ireg << $scale]" %}
5536  interface(MEMORY_INTER) %{
5537    base($reg);
5538    index($ireg);
5539    scale($scale);
5540    disp($off);
5541  %}
5542%}
5543
5544//----------Load Long Memory Operands------------------------------------------
5545// The load-long idiom will use it's address expression again after loading
5546// the first word of the long.  If the load-long destination overlaps with
5547// registers used in the addressing expression, the 2nd half will be loaded
5548// from a clobbered address.  Fix this by requiring that load-long use
5549// address registers that do not overlap with the load-long target.
5550
5551// load-long support
5552operand load_long_RegP() %{
5553  constraint(ALLOC_IN_RC(esi_reg));
5554  match(RegP);
5555  match(eSIRegP);
5556  op_cost(100);
5557  format %{  %}
5558  interface(REG_INTER);
5559%}
5560
5561// Indirect Memory Operand Long
5562operand load_long_indirect(load_long_RegP reg) %{
5563  constraint(ALLOC_IN_RC(esi_reg));
5564  match(reg);
5565
5566  format %{ "[$reg]" %}
5567  interface(MEMORY_INTER) %{
5568    base($reg);
5569    index(0x4);
5570    scale(0x0);
5571    disp(0x0);
5572  %}
5573%}
5574
5575// Indirect Memory Plus Long Offset Operand
5576operand load_long_indOffset32(load_long_RegP reg, immI off) %{
5577  match(AddP reg off);
5578
5579  format %{ "[$reg + $off]" %}
5580  interface(MEMORY_INTER) %{
5581    base($reg);
5582    index(0x4);
5583    scale(0x0);
5584    disp($off);
5585  %}
5586%}
5587
5588opclass load_long_memory(load_long_indirect, load_long_indOffset32);
5589
5590
5591//----------Special Memory Operands--------------------------------------------
5592// Stack Slot Operand - This operand is used for loading and storing temporary
5593//                      values on the stack where a match requires a value to
5594//                      flow through memory.
5595operand stackSlotP(sRegP reg) %{
5596  constraint(ALLOC_IN_RC(stack_slots));
5597  // No match rule because this operand is only generated in matching
5598  format %{ "[$reg]" %}
5599  interface(MEMORY_INTER) %{
5600    base(0x4);   // ESP
5601    index(0x4);  // No Index
5602    scale(0x0);  // No Scale
5603    disp($reg);  // Stack Offset
5604  %}
5605%}
5606
5607operand stackSlotI(sRegI reg) %{
5608  constraint(ALLOC_IN_RC(stack_slots));
5609  // No match rule because this operand is only generated in matching
5610  format %{ "[$reg]" %}
5611  interface(MEMORY_INTER) %{
5612    base(0x4);   // ESP
5613    index(0x4);  // No Index
5614    scale(0x0);  // No Scale
5615    disp($reg);  // Stack Offset
5616  %}
5617%}
5618
5619operand stackSlotF(sRegF reg) %{
5620  constraint(ALLOC_IN_RC(stack_slots));
5621  // No match rule because this operand is only generated in matching
5622  format %{ "[$reg]" %}
5623  interface(MEMORY_INTER) %{
5624    base(0x4);   // ESP
5625    index(0x4);  // No Index
5626    scale(0x0);  // No Scale
5627    disp($reg);  // Stack Offset
5628  %}
5629%}
5630
5631operand stackSlotD(sRegD reg) %{
5632  constraint(ALLOC_IN_RC(stack_slots));
5633  // No match rule because this operand is only generated in matching
5634  format %{ "[$reg]" %}
5635  interface(MEMORY_INTER) %{
5636    base(0x4);   // ESP
5637    index(0x4);  // No Index
5638    scale(0x0);  // No Scale
5639    disp($reg);  // Stack Offset
5640  %}
5641%}
5642
5643operand stackSlotL(sRegL reg) %{
5644  constraint(ALLOC_IN_RC(stack_slots));
5645  // No match rule because this operand is only generated in matching
5646  format %{ "[$reg]" %}
5647  interface(MEMORY_INTER) %{
5648    base(0x4);   // ESP
5649    index(0x4);  // No Index
5650    scale(0x0);  // No Scale
5651    disp($reg);  // Stack Offset
5652  %}
5653%}
5654
5655//----------Memory Operands - Win95 Implicit Null Variants----------------
5656// Indirect Memory Operand
5657operand indirect_win95_safe(eRegP_no_EBP reg)
5658%{
5659  constraint(ALLOC_IN_RC(e_reg));
5660  match(reg);
5661
5662  op_cost(100);
5663  format %{ "[$reg]" %}
5664  interface(MEMORY_INTER) %{
5665    base($reg);
5666    index(0x4);
5667    scale(0x0);
5668    disp(0x0);
5669  %}
5670%}
5671
5672// Indirect Memory Plus Short Offset Operand
5673operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
5674%{
5675  match(AddP reg off);
5676
5677  op_cost(100);
5678  format %{ "[$reg + $off]" %}
5679  interface(MEMORY_INTER) %{
5680    base($reg);
5681    index(0x4);
5682    scale(0x0);
5683    disp($off);
5684  %}
5685%}
5686
5687// Indirect Memory Plus Long Offset Operand
5688operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
5689%{
5690  match(AddP reg off);
5691
5692  op_cost(100);
5693  format %{ "[$reg + $off]" %}
5694  interface(MEMORY_INTER) %{
5695    base($reg);
5696    index(0x4);
5697    scale(0x0);
5698    disp($off);
5699  %}
5700%}
5701
5702// Indirect Memory Plus Index Register Plus Offset Operand
5703operand indIndexOffset_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI off)
5704%{
5705  match(AddP (AddP reg ireg) off);
5706
5707  op_cost(100);
5708  format %{"[$reg + $off + $ireg]" %}
5709  interface(MEMORY_INTER) %{
5710    base($reg);
5711    index($ireg);
5712    scale(0x0);
5713    disp($off);
5714  %}
5715%}
5716
5717// Indirect Memory Times Scale Plus Index Register
5718operand indIndexScale_win95_safe(eRegP_no_EBP reg, eRegI ireg, immI2 scale)
5719%{
5720  match(AddP reg (LShiftI ireg scale));
5721
5722  op_cost(100);
5723  format %{"[$reg + $ireg << $scale]" %}
5724  interface(MEMORY_INTER) %{
5725    base($reg);
5726    index($ireg);
5727    scale($scale);
5728    disp(0x0);
5729  %}
5730%}
5731
5732// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
5733operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, eRegI ireg, immI2 scale)
5734%{
5735  match(AddP (AddP reg (LShiftI ireg scale)) off);
5736
5737  op_cost(100);
5738  format %{"[$reg + $off + $ireg << $scale]" %}
5739  interface(MEMORY_INTER) %{
5740    base($reg);
5741    index($ireg);
5742    scale($scale);
5743    disp($off);
5744  %}
5745%}
5746
5747//----------Conditional Branch Operands----------------------------------------
5748// Comparison Op  - This is the operation of the comparison, and is limited to
5749//                  the following set of codes:
5750//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
5751//
5752// Other attributes of the comparison, such as unsignedness, are specified
5753// by the comparison instruction that sets a condition code flags register.
5754// That result is represented by a flags operand whose subtype is appropriate
5755// to the unsignedness (etc.) of the comparison.
5756//
5757// Later, the instruction which matches both the Comparison Op (a Bool) and
5758// the flags (produced by the Cmp) specifies the coding of the comparison op
5759// by matching a specific subtype of Bool operand below, such as cmpOpU.
5760
5761// Comparision Code
5762operand cmpOp() %{
5763  match(Bool);
5764
5765  format %{ "" %}
5766  interface(COND_INTER) %{
5767    equal(0x4, "e");
5768    not_equal(0x5, "ne");
5769    less(0xC, "l");
5770    greater_equal(0xD, "ge");
5771    less_equal(0xE, "le");
5772    greater(0xF, "g");
5773  %}
5774%}
5775
5776// Comparison Code, unsigned compare.  Used by FP also, with
5777// C2 (unordered) turned into GT or LT already.  The other bits
5778// C0 and C3 are turned into Carry & Zero flags.
5779operand cmpOpU() %{
5780  match(Bool);
5781
5782  format %{ "" %}
5783  interface(COND_INTER) %{
5784    equal(0x4, "e");
5785    not_equal(0x5, "ne");
5786    less(0x2, "b");
5787    greater_equal(0x3, "nb");
5788    less_equal(0x6, "be");
5789    greater(0x7, "nbe");
5790  %}
5791%}
5792
5793// Floating comparisons that don't require any fixup for the unordered case
5794operand cmpOpUCF() %{
5795  match(Bool);
5796  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
5797            n->as_Bool()->_test._test == BoolTest::ge ||
5798            n->as_Bool()->_test._test == BoolTest::le ||
5799            n->as_Bool()->_test._test == BoolTest::gt);
5800  format %{ "" %}
5801  interface(COND_INTER) %{
5802    equal(0x4, "e");
5803    not_equal(0x5, "ne");
5804    less(0x2, "b");
5805    greater_equal(0x3, "nb");
5806    less_equal(0x6, "be");
5807    greater(0x7, "nbe");
5808  %}
5809%}
5810
5811
5812// Floating comparisons that can be fixed up with extra conditional jumps
5813operand cmpOpUCF2() %{
5814  match(Bool);
5815  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
5816            n->as_Bool()->_test._test == BoolTest::eq);
5817  format %{ "" %}
5818  interface(COND_INTER) %{
5819    equal(0x4, "e");
5820    not_equal(0x5, "ne");
5821    less(0x2, "b");
5822    greater_equal(0x3, "nb");
5823    less_equal(0x6, "be");
5824    greater(0x7, "nbe");
5825  %}
5826%}
5827
5828// Comparison Code for FP conditional move
5829operand cmpOp_fcmov() %{
5830  match(Bool);
5831
5832  format %{ "" %}
5833  interface(COND_INTER) %{
5834    equal        (0x0C8);
5835    not_equal    (0x1C8);
5836    less         (0x0C0);
5837    greater_equal(0x1C0);
5838    less_equal   (0x0D0);
5839    greater      (0x1D0);
5840  %}
5841%}
5842
5843// Comparision Code used in long compares
5844operand cmpOp_commute() %{
5845  match(Bool);
5846
5847  format %{ "" %}
5848  interface(COND_INTER) %{
5849    equal(0x4, "e");
5850    not_equal(0x5, "ne");
5851    less(0xF, "g");
5852    greater_equal(0xE, "le");
5853    less_equal(0xD, "ge");
5854    greater(0xC, "l");
5855  %}
5856%}
5857
5858//----------OPERAND CLASSES----------------------------------------------------
5859// Operand Classes are groups of operands that are used as to simplify
5860// instruction definitions by not requiring the AD writer to specify separate
5861// instructions for every form of operand when the instruction accepts
5862// multiple operand types with the same basic encoding and format.  The classic
5863// case of this is memory operands.
5864
5865opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
5866               indIndex, indIndexScale, indIndexScaleOffset);
5867
5868// Long memory operations are encoded in 2 instructions and a +4 offset.
5869// This means some kind of offset is always required and you cannot use
5870// an oop as the offset (done when working on static globals).
5871opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
5872                    indIndex, indIndexScale, indIndexScaleOffset);
5873
5874
5875//----------PIPELINE-----------------------------------------------------------
5876// Rules which define the behavior of the target architectures pipeline.
5877pipeline %{
5878
5879//----------ATTRIBUTES---------------------------------------------------------
5880attributes %{
5881  variable_size_instructions;        // Fixed size instructions
5882  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
5883  instruction_unit_size = 1;         // An instruction is 1 bytes long
5884  instruction_fetch_unit_size = 16;  // The processor fetches one line
5885  instruction_fetch_units = 1;       // of 16 bytes
5886
5887  // List of nop instructions
5888  nops( MachNop );
5889%}
5890
5891//----------RESOURCES----------------------------------------------------------
5892// Resources are the functional units available to the machine
5893
5894// Generic P2/P3 pipeline
5895// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
5896// 3 instructions decoded per cycle.
5897// 2 load/store ops per cycle, 1 branch, 1 FPU,
5898// 2 ALU op, only ALU0 handles mul/div instructions.
5899resources( D0, D1, D2, DECODE = D0 | D1 | D2,
5900           MS0, MS1, MEM = MS0 | MS1,
5901           BR, FPU,
5902           ALU0, ALU1, ALU = ALU0 | ALU1 );
5903
5904//----------PIPELINE DESCRIPTION-----------------------------------------------
5905// Pipeline Description specifies the stages in the machine's pipeline
5906
5907// Generic P2/P3 pipeline
5908pipe_desc(S0, S1, S2, S3, S4, S5);
5909
5910//----------PIPELINE CLASSES---------------------------------------------------
5911// Pipeline Classes describe the stages in which input and output are
5912// referenced by the hardware pipeline.
5913
5914// Naming convention: ialu or fpu
5915// Then: _reg
5916// Then: _reg if there is a 2nd register
5917// Then: _long if it's a pair of instructions implementing a long
5918// Then: _fat if it requires the big decoder
5919//   Or: _mem if it requires the big decoder and a memory unit.
5920
5921// Integer ALU reg operation
5922pipe_class ialu_reg(eRegI dst) %{
5923    single_instruction;
5924    dst    : S4(write);
5925    dst    : S3(read);
5926    DECODE : S0;        // any decoder
5927    ALU    : S3;        // any alu
5928%}
5929
5930// Long ALU reg operation
5931pipe_class ialu_reg_long(eRegL dst) %{
5932    instruction_count(2);
5933    dst    : S4(write);
5934    dst    : S3(read);
5935    DECODE : S0(2);     // any 2 decoders
5936    ALU    : S3(2);     // both alus
5937%}
5938
5939// Integer ALU reg operation using big decoder
5940pipe_class ialu_reg_fat(eRegI dst) %{
5941    single_instruction;
5942    dst    : S4(write);
5943    dst    : S3(read);
5944    D0     : S0;        // big decoder only
5945    ALU    : S3;        // any alu
5946%}
5947
5948// Long ALU reg operation using big decoder
5949pipe_class ialu_reg_long_fat(eRegL dst) %{
5950    instruction_count(2);
5951    dst    : S4(write);
5952    dst    : S3(read);
5953    D0     : S0(2);     // big decoder only; twice
5954    ALU    : S3(2);     // any 2 alus
5955%}
5956
5957// Integer ALU reg-reg operation
5958pipe_class ialu_reg_reg(eRegI dst, eRegI src) %{
5959    single_instruction;
5960    dst    : S4(write);
5961    src    : S3(read);
5962    DECODE : S0;        // any decoder
5963    ALU    : S3;        // any alu
5964%}
5965
5966// Long ALU reg-reg operation
5967pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
5968    instruction_count(2);
5969    dst    : S4(write);
5970    src    : S3(read);
5971    DECODE : S0(2);     // any 2 decoders
5972    ALU    : S3(2);     // both alus
5973%}
5974
5975// Integer ALU reg-reg operation
5976pipe_class ialu_reg_reg_fat(eRegI dst, memory src) %{
5977    single_instruction;
5978    dst    : S4(write);
5979    src    : S3(read);
5980    D0     : S0;        // big decoder only
5981    ALU    : S3;        // any alu
5982%}
5983
5984// Long ALU reg-reg operation
5985pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
5986    instruction_count(2);
5987    dst    : S4(write);
5988    src    : S3(read);
5989    D0     : S0(2);     // big decoder only; twice
5990    ALU    : S3(2);     // both alus
5991%}
5992
5993// Integer ALU reg-mem operation
5994pipe_class ialu_reg_mem(eRegI dst, memory mem) %{
5995    single_instruction;
5996    dst    : S5(write);
5997    mem    : S3(read);
5998    D0     : S0;        // big decoder only
5999    ALU    : S4;        // any alu
6000    MEM    : S3;        // any mem
6001%}
6002
6003// Long ALU reg-mem operation
6004pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
6005    instruction_count(2);
6006    dst    : S5(write);
6007    mem    : S3(read);
6008    D0     : S0(2);     // big decoder only; twice
6009    ALU    : S4(2);     // any 2 alus
6010    MEM    : S3(2);     // both mems
6011%}
6012
6013// Integer mem operation (prefetch)
6014pipe_class ialu_mem(memory mem)
6015%{
6016    single_instruction;
6017    mem    : S3(read);
6018    D0     : S0;        // big decoder only
6019    MEM    : S3;        // any mem
6020%}
6021
6022// Integer Store to Memory
6023pipe_class ialu_mem_reg(memory mem, eRegI src) %{
6024    single_instruction;
6025    mem    : S3(read);
6026    src    : S5(read);
6027    D0     : S0;        // big decoder only
6028    ALU    : S4;        // any alu
6029    MEM    : S3;
6030%}
6031
6032// Long Store to Memory
6033pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
6034    instruction_count(2);
6035    mem    : S3(read);
6036    src    : S5(read);
6037    D0     : S0(2);     // big decoder only; twice
6038    ALU    : S4(2);     // any 2 alus
6039    MEM    : S3(2);     // Both mems
6040%}
6041
6042// Integer Store to Memory
6043pipe_class ialu_mem_imm(memory mem) %{
6044    single_instruction;
6045    mem    : S3(read);
6046    D0     : S0;        // big decoder only
6047    ALU    : S4;        // any alu
6048    MEM    : S3;
6049%}
6050
6051// Integer ALU0 reg-reg operation
6052pipe_class ialu_reg_reg_alu0(eRegI dst, eRegI src) %{
6053    single_instruction;
6054    dst    : S4(write);
6055    src    : S3(read);
6056    D0     : S0;        // Big decoder only
6057    ALU0   : S3;        // only alu0
6058%}
6059
6060// Integer ALU0 reg-mem operation
6061pipe_class ialu_reg_mem_alu0(eRegI dst, memory mem) %{
6062    single_instruction;
6063    dst    : S5(write);
6064    mem    : S3(read);
6065    D0     : S0;        // big decoder only
6066    ALU0   : S4;        // ALU0 only
6067    MEM    : S3;        // any mem
6068%}
6069
6070// Integer ALU reg-reg operation
6071pipe_class ialu_cr_reg_reg(eFlagsReg cr, eRegI src1, eRegI src2) %{
6072    single_instruction;
6073    cr     : S4(write);
6074    src1   : S3(read);
6075    src2   : S3(read);
6076    DECODE : S0;        // any decoder
6077    ALU    : S3;        // any alu
6078%}
6079
6080// Integer ALU reg-imm operation
6081pipe_class ialu_cr_reg_imm(eFlagsReg cr, eRegI src1) %{
6082    single_instruction;
6083    cr     : S4(write);
6084    src1   : S3(read);
6085    DECODE : S0;        // any decoder
6086    ALU    : S3;        // any alu
6087%}
6088
6089// Integer ALU reg-mem operation
6090pipe_class ialu_cr_reg_mem(eFlagsReg cr, eRegI src1, memory src2) %{
6091    single_instruction;
6092    cr     : S4(write);
6093    src1   : S3(read);
6094    src2   : S3(read);
6095    D0     : S0;        // big decoder only
6096    ALU    : S4;        // any alu
6097    MEM    : S3;
6098%}
6099
6100// Conditional move reg-reg
6101pipe_class pipe_cmplt( eRegI p, eRegI q, eRegI y ) %{
6102    instruction_count(4);
6103    y      : S4(read);
6104    q      : S3(read);
6105    p      : S3(read);
6106    DECODE : S0(4);     // any decoder
6107%}
6108
6109// Conditional move reg-reg
6110pipe_class pipe_cmov_reg( eRegI dst, eRegI src, eFlagsReg cr ) %{
6111    single_instruction;
6112    dst    : S4(write);
6113    src    : S3(read);
6114    cr     : S3(read);
6115    DECODE : S0;        // any decoder
6116%}
6117
6118// Conditional move reg-mem
6119pipe_class pipe_cmov_mem( eFlagsReg cr, eRegI dst, memory src) %{
6120    single_instruction;
6121    dst    : S4(write);
6122    src    : S3(read);
6123    cr     : S3(read);
6124    DECODE : S0;        // any decoder
6125    MEM    : S3;
6126%}
6127
6128// Conditional move reg-reg long
6129pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
6130    single_instruction;
6131    dst    : S4(write);
6132    src    : S3(read);
6133    cr     : S3(read);
6134    DECODE : S0(2);     // any 2 decoders
6135%}
6136
6137// Conditional move double reg-reg
6138pipe_class pipe_cmovD_reg( eFlagsReg cr, regDPR1 dst, regD src) %{
6139    single_instruction;
6140    dst    : S4(write);
6141    src    : S3(read);
6142    cr     : S3(read);
6143    DECODE : S0;        // any decoder
6144%}
6145
6146// Float reg-reg operation
6147pipe_class fpu_reg(regD dst) %{
6148    instruction_count(2);
6149    dst    : S3(read);
6150    DECODE : S0(2);     // any 2 decoders
6151    FPU    : S3;
6152%}
6153
6154// Float reg-reg operation
6155pipe_class fpu_reg_reg(regD dst, regD src) %{
6156    instruction_count(2);
6157    dst    : S4(write);
6158    src    : S3(read);
6159    DECODE : S0(2);     // any 2 decoders
6160    FPU    : S3;
6161%}
6162
6163// Float reg-reg operation
6164pipe_class fpu_reg_reg_reg(regD dst, regD src1, regD src2) %{
6165    instruction_count(3);
6166    dst    : S4(write);
6167    src1   : S3(read);
6168    src2   : S3(read);
6169    DECODE : S0(3);     // any 3 decoders
6170    FPU    : S3(2);
6171%}
6172
6173// Float reg-reg operation
6174pipe_class fpu_reg_reg_reg_reg(regD dst, regD src1, regD src2, regD src3) %{
6175    instruction_count(4);
6176    dst    : S4(write);
6177    src1   : S3(read);
6178    src2   : S3(read);
6179    src3   : S3(read);
6180    DECODE : S0(4);     // any 3 decoders
6181    FPU    : S3(2);
6182%}
6183
6184// Float reg-reg operation
6185pipe_class fpu_reg_mem_reg_reg(regD dst, memory src1, regD src2, regD src3) %{
6186    instruction_count(4);
6187    dst    : S4(write);
6188    src1   : S3(read);
6189    src2   : S3(read);
6190    src3   : S3(read);
6191    DECODE : S1(3);     // any 3 decoders
6192    D0     : S0;        // Big decoder only
6193    FPU    : S3(2);
6194    MEM    : S3;
6195%}
6196
6197// Float reg-mem operation
6198pipe_class fpu_reg_mem(regD dst, memory mem) %{
6199    instruction_count(2);
6200    dst    : S5(write);
6201    mem    : S3(read);
6202    D0     : S0;        // big decoder only
6203    DECODE : S1;        // any decoder for FPU POP
6204    FPU    : S4;
6205    MEM    : S3;        // any mem
6206%}
6207
6208// Float reg-mem operation
6209pipe_class fpu_reg_reg_mem(regD dst, regD src1, memory mem) %{
6210    instruction_count(3);
6211    dst    : S5(write);
6212    src1   : S3(read);
6213    mem    : S3(read);
6214    D0     : S0;        // big decoder only
6215    DECODE : S1(2);     // any decoder for FPU POP
6216    FPU    : S4;
6217    MEM    : S3;        // any mem
6218%}
6219
6220// Float mem-reg operation
6221pipe_class fpu_mem_reg(memory mem, regD src) %{
6222    instruction_count(2);
6223    src    : S5(read);
6224    mem    : S3(read);
6225    DECODE : S0;        // any decoder for FPU PUSH
6226    D0     : S1;        // big decoder only
6227    FPU    : S4;
6228    MEM    : S3;        // any mem
6229%}
6230
6231pipe_class fpu_mem_reg_reg(memory mem, regD src1, regD src2) %{
6232    instruction_count(3);
6233    src1   : S3(read);
6234    src2   : S3(read);
6235    mem    : S3(read);
6236    DECODE : S0(2);     // any decoder for FPU PUSH
6237    D0     : S1;        // big decoder only
6238    FPU    : S4;
6239    MEM    : S3;        // any mem
6240%}
6241
6242pipe_class fpu_mem_reg_mem(memory mem, regD src1, memory src2) %{
6243    instruction_count(3);
6244    src1   : S3(read);
6245    src2   : S3(read);
6246    mem    : S4(read);
6247    DECODE : S0;        // any decoder for FPU PUSH
6248    D0     : S0(2);     // big decoder only
6249    FPU    : S4;
6250    MEM    : S3(2);     // any mem
6251%}
6252
6253pipe_class fpu_mem_mem(memory dst, memory src1) %{
6254    instruction_count(2);
6255    src1   : S3(read);
6256    dst    : S4(read);
6257    D0     : S0(2);     // big decoder only
6258    MEM    : S3(2);     // any mem
6259%}
6260
6261pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
6262    instruction_count(3);
6263    src1   : S3(read);
6264    src2   : S3(read);
6265    dst    : S4(read);
6266    D0     : S0(3);     // big decoder only
6267    FPU    : S4;
6268    MEM    : S3(3);     // any mem
6269%}
6270
6271pipe_class fpu_mem_reg_con(memory mem, regD src1) %{
6272    instruction_count(3);
6273    src1   : S4(read);
6274    mem    : S4(read);
6275    DECODE : S0;        // any decoder for FPU PUSH
6276    D0     : S0(2);     // big decoder only
6277    FPU    : S4;
6278    MEM    : S3(2);     // any mem
6279%}
6280
6281// Float load constant
6282pipe_class fpu_reg_con(regD dst) %{
6283    instruction_count(2);
6284    dst    : S5(write);
6285    D0     : S0;        // big decoder only for the load
6286    DECODE : S1;        // any decoder for FPU POP
6287    FPU    : S4;
6288    MEM    : S3;        // any mem
6289%}
6290
6291// Float load constant
6292pipe_class fpu_reg_reg_con(regD dst, regD src) %{
6293    instruction_count(3);
6294    dst    : S5(write);
6295    src    : S3(read);
6296    D0     : S0;        // big decoder only for the load
6297    DECODE : S1(2);     // any decoder for FPU POP
6298    FPU    : S4;
6299    MEM    : S3;        // any mem
6300%}
6301
6302// UnConditional branch
6303pipe_class pipe_jmp( label labl ) %{
6304    single_instruction;
6305    BR   : S3;
6306%}
6307
6308// Conditional branch
6309pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
6310    single_instruction;
6311    cr    : S1(read);
6312    BR    : S3;
6313%}
6314
6315// Allocation idiom
6316pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
6317    instruction_count(1); force_serialization;
6318    fixed_latency(6);
6319    heap_ptr : S3(read);
6320    DECODE   : S0(3);
6321    D0       : S2;
6322    MEM      : S3;
6323    ALU      : S3(2);
6324    dst      : S5(write);
6325    BR       : S5;
6326%}
6327
6328// Generic big/slow expanded idiom
6329pipe_class pipe_slow(  ) %{
6330    instruction_count(10); multiple_bundles; force_serialization;
6331    fixed_latency(100);
6332    D0  : S0(2);
6333    MEM : S3(2);
6334%}
6335
6336// The real do-nothing guy
6337pipe_class empty( ) %{
6338    instruction_count(0);
6339%}
6340
6341// Define the class for the Nop node
6342define %{
6343   MachNop = empty;
6344%}
6345
6346%}
6347
6348//----------INSTRUCTIONS-------------------------------------------------------
6349//
6350// match      -- States which machine-independent subtree may be replaced
6351//               by this instruction.
6352// ins_cost   -- The estimated cost of this instruction is used by instruction
6353//               selection to identify a minimum cost tree of machine
6354//               instructions that matches a tree of machine-independent
6355//               instructions.
6356// format     -- A string providing the disassembly for this instruction.
6357//               The value of an instruction's operand may be inserted
6358//               by referring to it with a '$' prefix.
6359// opcode     -- Three instruction opcodes may be provided.  These are referred
6360//               to within an encode class as $primary, $secondary, and $tertiary
6361//               respectively.  The primary opcode is commonly used to
6362//               indicate the type of machine instruction, while secondary
6363//               and tertiary are often used for prefix options or addressing
6364//               modes.
6365// ins_encode -- A list of encode classes with parameters. The encode class
6366//               name must have been defined in an 'enc_class' specification
6367//               in the encode section of the architecture description.
6368
6369//----------BSWAP-Instruction--------------------------------------------------
6370instruct bytes_reverse_int(eRegI dst) %{
6371  match(Set dst (ReverseBytesI dst));
6372
6373  format %{ "BSWAP  $dst" %}
6374  opcode(0x0F, 0xC8);
6375  ins_encode( OpcP, OpcSReg(dst) );
6376  ins_pipe( ialu_reg );
6377%}
6378
6379instruct bytes_reverse_long(eRegL dst) %{
6380  match(Set dst (ReverseBytesL dst));
6381
6382  format %{ "BSWAP  $dst.lo\n\t"
6383            "BSWAP  $dst.hi\n\t"
6384            "XCHG   $dst.lo $dst.hi" %}
6385
6386  ins_cost(125);
6387  ins_encode( bswap_long_bytes(dst) );
6388  ins_pipe( ialu_reg_reg);
6389%}
6390
6391
6392//----------Load/Store/Move Instructions---------------------------------------
6393//----------Load Instructions--------------------------------------------------
6394// Load Byte (8bit signed)
6395instruct loadB(xRegI dst, memory mem) %{
6396  match(Set dst (LoadB mem));
6397
6398  ins_cost(125);
6399  format %{ "MOVSX8 $dst,$mem" %}
6400  opcode(0xBE, 0x0F);
6401  ins_encode( OpcS, OpcP, RegMem(dst,mem));
6402  ins_pipe( ialu_reg_mem );
6403%}
6404
6405// Load Byte (8bit UNsigned)
6406instruct loadUB(xRegI dst, memory mem, immI_255 bytemask) %{
6407  match(Set dst (AndI (LoadB mem) bytemask));
6408
6409  ins_cost(125);
6410  format %{ "MOVZX8 $dst,$mem" %}
6411  opcode(0xB6, 0x0F);
6412  ins_encode( OpcS, OpcP, RegMem(dst,mem));
6413  ins_pipe( ialu_reg_mem );
6414%}
6415
6416// Load Unsigned Short/Char (16bit unsigned)
6417instruct loadUS(eRegI dst, memory mem) %{
6418  match(Set dst (LoadUS mem));
6419
6420  ins_cost(125);
6421  format %{ "MOVZX  $dst,$mem" %}
6422  opcode(0xB7, 0x0F);
6423  ins_encode( OpcS, OpcP, RegMem(dst,mem));
6424  ins_pipe( ialu_reg_mem );
6425%}
6426
6427// Load Integer
6428instruct loadI(eRegI dst, memory mem) %{
6429  match(Set dst (LoadI mem));
6430
6431  ins_cost(125);
6432  format %{ "MOV    $dst,$mem" %}
6433  opcode(0x8B);
6434  ins_encode( OpcP, RegMem(dst,mem));
6435  ins_pipe( ialu_reg_mem );
6436%}
6437
6438// Load Long.  Cannot clobber address while loading, so restrict address
6439// register to ESI
6440instruct loadL(eRegL dst, load_long_memory mem) %{
6441  predicate(!((LoadLNode*)n)->require_atomic_access());
6442  match(Set dst (LoadL mem));
6443
6444  ins_cost(250);
6445  format %{ "MOV    $dst.lo,$mem\n\t"
6446            "MOV    $dst.hi,$mem+4" %}
6447  opcode(0x8B, 0x8B);
6448  ins_encode( OpcP, RegMem(dst,mem), OpcS, RegMem_Hi(dst,mem));
6449  ins_pipe( ialu_reg_long_mem );
6450%}
6451
6452// Volatile Load Long.  Must be atomic, so do 64-bit FILD
6453// then store it down to the stack and reload on the int
6454// side.
6455instruct loadL_volatile(stackSlotL dst, memory mem) %{
6456  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
6457  match(Set dst (LoadL mem));
6458
6459  ins_cost(200);
6460  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
6461            "FISTp  $dst" %}
6462  ins_encode(enc_loadL_volatile(mem,dst));
6463  ins_pipe( fpu_reg_mem );
6464%}
6465
6466instruct loadLX_volatile(stackSlotL dst, memory mem, regXD tmp) %{
6467  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6468  match(Set dst (LoadL mem));
6469  effect(TEMP tmp);
6470  ins_cost(180);
6471  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6472            "MOVSD  $dst,$tmp" %}
6473  ins_encode(enc_loadLX_volatile(mem, dst, tmp));
6474  ins_pipe( pipe_slow );
6475%}
6476
6477instruct loadLX_reg_volatile(eRegL dst, memory mem, regXD tmp) %{
6478  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
6479  match(Set dst (LoadL mem));
6480  effect(TEMP tmp);
6481  ins_cost(160);
6482  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
6483            "MOVD   $dst.lo,$tmp\n\t"
6484            "PSRLQ  $tmp,32\n\t"
6485            "MOVD   $dst.hi,$tmp" %}
6486  ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
6487  ins_pipe( pipe_slow );
6488%}
6489
6490// Load Range
6491instruct loadRange(eRegI dst, memory mem) %{
6492  match(Set dst (LoadRange mem));
6493
6494  ins_cost(125);
6495  format %{ "MOV    $dst,$mem" %}
6496  opcode(0x8B);
6497  ins_encode( OpcP, RegMem(dst,mem));
6498  ins_pipe( ialu_reg_mem );
6499%}
6500
6501
6502// Load Pointer
6503instruct loadP(eRegP dst, memory mem) %{
6504  match(Set dst (LoadP mem));
6505
6506  ins_cost(125);
6507  format %{ "MOV    $dst,$mem" %}
6508  opcode(0x8B);
6509  ins_encode( OpcP, RegMem(dst,mem));
6510  ins_pipe( ialu_reg_mem );
6511%}
6512
6513// Load Klass Pointer
6514instruct loadKlass(eRegP dst, memory mem) %{
6515  match(Set dst (LoadKlass mem));
6516
6517  ins_cost(125);
6518  format %{ "MOV    $dst,$mem" %}
6519  opcode(0x8B);
6520  ins_encode( OpcP, RegMem(dst,mem));
6521  ins_pipe( ialu_reg_mem );
6522%}
6523
6524// Load Short (16bit signed)
6525instruct loadS(eRegI dst, memory mem) %{
6526  match(Set dst (LoadS mem));
6527
6528  ins_cost(125);
6529  format %{ "MOVSX  $dst,$mem" %}
6530  opcode(0xBF, 0x0F);
6531  ins_encode( OpcS, OpcP, RegMem(dst,mem));
6532  ins_pipe( ialu_reg_mem );
6533%}
6534
6535// Load Double
6536instruct loadD(regD dst, memory mem) %{
6537  predicate(UseSSE<=1);
6538  match(Set dst (LoadD mem));
6539
6540  ins_cost(150);
6541  format %{ "FLD_D  ST,$mem\n\t"
6542            "FSTP   $dst" %}
6543  opcode(0xDD);               /* DD /0 */
6544  ins_encode( OpcP, RMopc_Mem(0x00,mem),
6545              Pop_Reg_D(dst) );
6546  ins_pipe( fpu_reg_mem );
6547%}
6548
6549// Load Double to XMM
6550instruct loadXD(regXD dst, memory mem) %{
6551  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
6552  match(Set dst (LoadD mem));
6553  ins_cost(145);
6554  format %{ "MOVSD  $dst,$mem" %}
6555  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6556  ins_pipe( pipe_slow );
6557%}
6558
6559instruct loadXD_partial(regXD dst, memory mem) %{
6560  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
6561  match(Set dst (LoadD mem));
6562  ins_cost(145);
6563  format %{ "MOVLPD $dst,$mem" %}
6564  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,mem));
6565  ins_pipe( pipe_slow );
6566%}
6567
6568// Load to XMM register (single-precision floating point)
6569// MOVSS instruction
6570instruct loadX(regX dst, memory mem) %{
6571  predicate(UseSSE>=1);
6572  match(Set dst (LoadF mem));
6573  ins_cost(145);
6574  format %{ "MOVSS  $dst,$mem" %}
6575  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,mem));
6576  ins_pipe( pipe_slow );
6577%}
6578
6579// Load Float
6580instruct loadF(regF dst, memory mem) %{
6581  predicate(UseSSE==0);
6582  match(Set dst (LoadF mem));
6583
6584  ins_cost(150);
6585  format %{ "FLD_S  ST,$mem\n\t"
6586            "FSTP   $dst" %}
6587  opcode(0xD9);               /* D9 /0 */
6588  ins_encode( OpcP, RMopc_Mem(0x00,mem),
6589              Pop_Reg_F(dst) );
6590  ins_pipe( fpu_reg_mem );
6591%}
6592
6593// Load Aligned Packed Byte to XMM register
6594instruct loadA8B(regXD dst, memory mem) %{
6595  predicate(UseSSE>=1);
6596  match(Set dst (Load8B mem));
6597  ins_cost(125);
6598  format %{ "MOVQ  $dst,$mem\t! packed8B" %}
6599  ins_encode( movq_ld(dst, mem));
6600  ins_pipe( pipe_slow );
6601%}
6602
6603// Load Aligned Packed Short to XMM register
6604instruct loadA4S(regXD dst, memory mem) %{
6605  predicate(UseSSE>=1);
6606  match(Set dst (Load4S mem));
6607  ins_cost(125);
6608  format %{ "MOVQ  $dst,$mem\t! packed4S" %}
6609  ins_encode( movq_ld(dst, mem));
6610  ins_pipe( pipe_slow );
6611%}
6612
6613// Load Aligned Packed Char to XMM register
6614instruct loadA4C(regXD dst, memory mem) %{
6615  predicate(UseSSE>=1);
6616  match(Set dst (Load4C mem));
6617  ins_cost(125);
6618  format %{ "MOVQ  $dst,$mem\t! packed4C" %}
6619  ins_encode( movq_ld(dst, mem));
6620  ins_pipe( pipe_slow );
6621%}
6622
6623// Load Aligned Packed Integer to XMM register
6624instruct load2IU(regXD dst, memory mem) %{
6625  predicate(UseSSE>=1);
6626  match(Set dst (Load2I mem));
6627  ins_cost(125);
6628  format %{ "MOVQ  $dst,$mem\t! packed2I" %}
6629  ins_encode( movq_ld(dst, mem));
6630  ins_pipe( pipe_slow );
6631%}
6632
6633// Load Aligned Packed Single to XMM
6634instruct loadA2F(regXD dst, memory mem) %{
6635  predicate(UseSSE>=1);
6636  match(Set dst (Load2F mem));
6637  ins_cost(145);
6638  format %{ "MOVQ  $dst,$mem\t! packed2F" %}
6639  ins_encode( movq_ld(dst, mem));
6640  ins_pipe( pipe_slow );
6641%}
6642
6643// Load Effective Address
6644instruct leaP8(eRegP dst, indOffset8 mem) %{
6645  match(Set dst mem);
6646
6647  ins_cost(110);
6648  format %{ "LEA    $dst,$mem" %}
6649  opcode(0x8D);
6650  ins_encode( OpcP, RegMem(dst,mem));
6651  ins_pipe( ialu_reg_reg_fat );
6652%}
6653
6654instruct leaP32(eRegP dst, indOffset32 mem) %{
6655  match(Set dst mem);
6656
6657  ins_cost(110);
6658  format %{ "LEA    $dst,$mem" %}
6659  opcode(0x8D);
6660  ins_encode( OpcP, RegMem(dst,mem));
6661  ins_pipe( ialu_reg_reg_fat );
6662%}
6663
6664instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
6665  match(Set dst mem);
6666
6667  ins_cost(110);
6668  format %{ "LEA    $dst,$mem" %}
6669  opcode(0x8D);
6670  ins_encode( OpcP, RegMem(dst,mem));
6671  ins_pipe( ialu_reg_reg_fat );
6672%}
6673
6674instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
6675  match(Set dst mem);
6676
6677  ins_cost(110);
6678  format %{ "LEA    $dst,$mem" %}
6679  opcode(0x8D);
6680  ins_encode( OpcP, RegMem(dst,mem));
6681  ins_pipe( ialu_reg_reg_fat );
6682%}
6683
6684instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
6685  match(Set dst mem);
6686
6687  ins_cost(110);
6688  format %{ "LEA    $dst,$mem" %}
6689  opcode(0x8D);
6690  ins_encode( OpcP, RegMem(dst,mem));
6691  ins_pipe( ialu_reg_reg_fat );
6692%}
6693
6694// Load Constant
6695instruct loadConI(eRegI dst, immI src) %{
6696  match(Set dst src);
6697
6698  format %{ "MOV    $dst,$src" %}
6699  ins_encode( LdImmI(dst, src) );
6700  ins_pipe( ialu_reg_fat );
6701%}
6702
6703// Load Constant zero
6704instruct loadConI0(eRegI dst, immI0 src, eFlagsReg cr) %{
6705  match(Set dst src);
6706  effect(KILL cr);
6707
6708  ins_cost(50);
6709  format %{ "XOR    $dst,$dst" %}
6710  opcode(0x33);  /* + rd */
6711  ins_encode( OpcP, RegReg( dst, dst ) );
6712  ins_pipe( ialu_reg );
6713%}
6714
6715instruct loadConP(eRegP dst, immP src) %{
6716  match(Set dst src);
6717
6718  format %{ "MOV    $dst,$src" %}
6719  opcode(0xB8);  /* + rd */
6720  ins_encode( LdImmP(dst, src) );
6721  ins_pipe( ialu_reg_fat );
6722%}
6723
6724instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
6725  match(Set dst src);
6726  effect(KILL cr);
6727  ins_cost(200);
6728  format %{ "MOV    $dst.lo,$src.lo\n\t"
6729            "MOV    $dst.hi,$src.hi" %}
6730  opcode(0xB8);
6731  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
6732  ins_pipe( ialu_reg_long_fat );
6733%}
6734
6735instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
6736  match(Set dst src);
6737  effect(KILL cr);
6738  ins_cost(150);
6739  format %{ "XOR    $dst.lo,$dst.lo\n\t"
6740            "XOR    $dst.hi,$dst.hi" %}
6741  opcode(0x33,0x33);
6742  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
6743  ins_pipe( ialu_reg_long );
6744%}
6745
6746// The instruction usage is guarded by predicate in operand immF().
6747instruct loadConF(regF dst, immF src) %{
6748  match(Set dst src);
6749  ins_cost(125);
6750
6751  format %{ "FLD_S  ST,$src\n\t"
6752            "FSTP   $dst" %}
6753  opcode(0xD9, 0x00);       /* D9 /0 */
6754  ins_encode(LdImmF(src), Pop_Reg_F(dst) );
6755  ins_pipe( fpu_reg_con );
6756%}
6757
6758// The instruction usage is guarded by predicate in operand immXF().
6759instruct loadConX(regX dst, immXF con) %{
6760  match(Set dst con);
6761  ins_cost(125);
6762  format %{ "MOVSS  $dst,[$con]" %}
6763  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), LdImmX(dst, con));
6764  ins_pipe( pipe_slow );
6765%}
6766
6767// The instruction usage is guarded by predicate in operand immXF0().
6768instruct loadConX0(regX dst, immXF0 src) %{
6769  match(Set dst src);
6770  ins_cost(100);
6771  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6772  ins_encode( Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
6773  ins_pipe( pipe_slow );
6774%}
6775
6776// The instruction usage is guarded by predicate in operand immD().
6777instruct loadConD(regD dst, immD src) %{
6778  match(Set dst src);
6779  ins_cost(125);
6780
6781  format %{ "FLD_D  ST,$src\n\t"
6782            "FSTP   $dst" %}
6783  ins_encode(LdImmD(src), Pop_Reg_D(dst) );
6784  ins_pipe( fpu_reg_con );
6785%}
6786
6787// The instruction usage is guarded by predicate in operand immXD().
6788instruct loadConXD(regXD dst, immXD con) %{
6789  match(Set dst con);
6790  ins_cost(125);
6791  format %{ "MOVSD  $dst,[$con]" %}
6792  ins_encode(load_conXD(dst, con));
6793  ins_pipe( pipe_slow );
6794%}
6795
6796// The instruction usage is guarded by predicate in operand immXD0().
6797instruct loadConXD0(regXD dst, immXD0 src) %{
6798  match(Set dst src);
6799  ins_cost(100);
6800  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6801  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x57), RegReg(dst,dst));
6802  ins_pipe( pipe_slow );
6803%}
6804
6805// Load Stack Slot
6806instruct loadSSI(eRegI dst, stackSlotI src) %{
6807  match(Set dst src);
6808  ins_cost(125);
6809
6810  format %{ "MOV    $dst,$src" %}
6811  opcode(0x8B);
6812  ins_encode( OpcP, RegMem(dst,src));
6813  ins_pipe( ialu_reg_mem );
6814%}
6815
6816instruct loadSSL(eRegL dst, stackSlotL src) %{
6817  match(Set dst src);
6818
6819  ins_cost(200);
6820  format %{ "MOV    $dst,$src.lo\n\t"
6821            "MOV    $dst+4,$src.hi" %}
6822  opcode(0x8B, 0x8B);
6823  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6824  ins_pipe( ialu_mem_long_reg );
6825%}
6826
6827// Load Stack Slot
6828instruct loadSSP(eRegP dst, stackSlotP src) %{
6829  match(Set dst src);
6830  ins_cost(125);
6831
6832  format %{ "MOV    $dst,$src" %}
6833  opcode(0x8B);
6834  ins_encode( OpcP, RegMem(dst,src));
6835  ins_pipe( ialu_reg_mem );
6836%}
6837
6838// Load Stack Slot
6839instruct loadSSF(regF dst, stackSlotF src) %{
6840  match(Set dst src);
6841  ins_cost(125);
6842
6843  format %{ "FLD_S  $src\n\t"
6844            "FSTP   $dst" %}
6845  opcode(0xD9);               /* D9 /0, FLD m32real */
6846  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6847              Pop_Reg_F(dst) );
6848  ins_pipe( fpu_reg_mem );
6849%}
6850
6851// Load Stack Slot
6852instruct loadSSD(regD dst, stackSlotD src) %{
6853  match(Set dst src);
6854  ins_cost(125);
6855
6856  format %{ "FLD_D  $src\n\t"
6857            "FSTP   $dst" %}
6858  opcode(0xDD);               /* DD /0, FLD m64real */
6859  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6860              Pop_Reg_D(dst) );
6861  ins_pipe( fpu_reg_mem );
6862%}
6863
6864// Prefetch instructions.
6865// Must be safe to execute with invalid address (cannot fault).
6866
6867instruct prefetchr0( memory mem ) %{
6868  predicate(UseSSE==0 && !VM_Version::supports_3dnow());
6869  match(PrefetchRead mem);
6870  ins_cost(0);
6871  size(0);
6872  format %{ "PREFETCHR (non-SSE is empty encoding)" %}
6873  ins_encode();
6874  ins_pipe(empty);
6875%}
6876
6877instruct prefetchr( memory mem ) %{
6878  predicate(UseSSE==0 && VM_Version::supports_3dnow() || ReadPrefetchInstr==3);
6879  match(PrefetchRead mem);
6880  ins_cost(100);
6881
6882  format %{ "PREFETCHR $mem\t! Prefetch into level 1 cache for read" %}
6883  opcode(0x0F, 0x0d);     /* Opcode 0F 0d /0 */
6884  ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
6885  ins_pipe(ialu_mem);
6886%}
6887
6888instruct prefetchrNTA( memory mem ) %{
6889  predicate(UseSSE>=1 && ReadPrefetchInstr==0);
6890  match(PrefetchRead mem);
6891  ins_cost(100);
6892
6893  format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for read" %}
6894  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6895  ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
6896  ins_pipe(ialu_mem);
6897%}
6898
6899instruct prefetchrT0( memory mem ) %{
6900  predicate(UseSSE>=1 && ReadPrefetchInstr==1);
6901  match(PrefetchRead mem);
6902  ins_cost(100);
6903
6904  format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for read" %}
6905  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6906  ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
6907  ins_pipe(ialu_mem);
6908%}
6909
6910instruct prefetchrT2( memory mem ) %{
6911  predicate(UseSSE>=1 && ReadPrefetchInstr==2);
6912  match(PrefetchRead mem);
6913  ins_cost(100);
6914
6915  format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for read" %}
6916  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6917  ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
6918  ins_pipe(ialu_mem);
6919%}
6920
6921instruct prefetchw0( memory mem ) %{
6922  predicate(UseSSE==0 && !VM_Version::supports_3dnow());
6923  match(PrefetchWrite mem);
6924  ins_cost(0);
6925  size(0);
6926  format %{ "Prefetch (non-SSE is empty encoding)" %}
6927  ins_encode();
6928  ins_pipe(empty);
6929%}
6930
6931instruct prefetchw( memory mem ) %{
6932  predicate(UseSSE==0 && VM_Version::supports_3dnow() || AllocatePrefetchInstr==3);
6933  match( PrefetchWrite mem );
6934  ins_cost(100);
6935
6936  format %{ "PREFETCHW $mem\t! Prefetch into L1 cache and mark modified" %}
6937  opcode(0x0F, 0x0D);     /* Opcode 0F 0D /1 */
6938  ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
6939  ins_pipe(ialu_mem);
6940%}
6941
6942instruct prefetchwNTA( memory mem ) %{
6943  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6944  match(PrefetchWrite mem);
6945  ins_cost(100);
6946
6947  format %{ "PREFETCHNTA $mem\t! Prefetch into non-temporal cache for write" %}
6948  opcode(0x0F, 0x18);     /* Opcode 0F 18 /0 */
6949  ins_encode(OpcP, OpcS, RMopc_Mem(0x00,mem));
6950  ins_pipe(ialu_mem);
6951%}
6952
6953instruct prefetchwT0( memory mem ) %{
6954  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6955  match(PrefetchWrite mem);
6956  ins_cost(100);
6957
6958  format %{ "PREFETCHT0 $mem\t! Prefetch into L1 and L2 caches for write" %}
6959  opcode(0x0F, 0x18);     /* Opcode 0F 18 /1 */
6960  ins_encode(OpcP, OpcS, RMopc_Mem(0x01,mem));
6961  ins_pipe(ialu_mem);
6962%}
6963
6964instruct prefetchwT2( memory mem ) %{
6965  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6966  match(PrefetchWrite mem);
6967  ins_cost(100);
6968
6969  format %{ "PREFETCHT2 $mem\t! Prefetch into L2 cache for write" %}
6970  opcode(0x0F, 0x18);     /* Opcode 0F 18 /3 */
6971  ins_encode(OpcP, OpcS, RMopc_Mem(0x03,mem));
6972  ins_pipe(ialu_mem);
6973%}
6974
6975//----------Store Instructions-------------------------------------------------
6976
6977// Store Byte
6978instruct storeB(memory mem, xRegI src) %{
6979  match(Set mem (StoreB mem src));
6980
6981  ins_cost(125);
6982  format %{ "MOV8   $mem,$src" %}
6983  opcode(0x88);
6984  ins_encode( OpcP, RegMem( src, mem ) );
6985  ins_pipe( ialu_mem_reg );
6986%}
6987
6988// Store Char/Short
6989instruct storeC(memory mem, eRegI src) %{
6990  match(Set mem (StoreC mem src));
6991
6992  ins_cost(125);
6993  format %{ "MOV16  $mem,$src" %}
6994  opcode(0x89, 0x66);
6995  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6996  ins_pipe( ialu_mem_reg );
6997%}
6998
6999// Store Integer
7000instruct storeI(memory mem, eRegI src) %{
7001  match(Set mem (StoreI mem src));
7002
7003  ins_cost(125);
7004  format %{ "MOV    $mem,$src" %}
7005  opcode(0x89);
7006  ins_encode( OpcP, RegMem( src, mem ) );
7007  ins_pipe( ialu_mem_reg );
7008%}
7009
7010// Store Long
7011instruct storeL(long_memory mem, eRegL src) %{
7012  predicate(!((StoreLNode*)n)->require_atomic_access());
7013  match(Set mem (StoreL mem src));
7014
7015  ins_cost(200);
7016  format %{ "MOV    $mem,$src.lo\n\t"
7017            "MOV    $mem+4,$src.hi" %}
7018  opcode(0x89, 0x89);
7019  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
7020  ins_pipe( ialu_mem_long_reg );
7021%}
7022
7023// Volatile Store Long.  Must be atomic, so move it into
7024// the FP TOS and then do a 64-bit FIST.  Has to probe the
7025// target address before the store (for null-ptr checks)
7026// so the memory operand is used twice in the encoding.
7027instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
7028  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
7029  match(Set mem (StoreL mem src));
7030  effect( KILL cr );
7031  ins_cost(400);
7032  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7033            "FILD   $src\n\t"
7034            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
7035  opcode(0x3B);
7036  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
7037  ins_pipe( fpu_reg_mem );
7038%}
7039
7040instruct storeLX_volatile(memory mem, stackSlotL src, regXD tmp, eFlagsReg cr) %{
7041  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7042  match(Set mem (StoreL mem src));
7043  effect( TEMP tmp, KILL cr );
7044  ins_cost(380);
7045  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7046            "MOVSD  $tmp,$src\n\t"
7047            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7048  opcode(0x3B);
7049  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_volatile(mem, src, tmp));
7050  ins_pipe( pipe_slow );
7051%}
7052
7053instruct storeLX_reg_volatile(memory mem, eRegL src, regXD tmp2, regXD tmp, eFlagsReg cr) %{
7054  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
7055  match(Set mem (StoreL mem src));
7056  effect( TEMP tmp2 , TEMP tmp, KILL cr );
7057  ins_cost(360);
7058  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
7059            "MOVD   $tmp,$src.lo\n\t"
7060            "MOVD   $tmp2,$src.hi\n\t"
7061            "PUNPCKLDQ $tmp,$tmp2\n\t"
7062            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
7063  opcode(0x3B);
7064  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeLX_reg_volatile(mem, src, tmp, tmp2));
7065  ins_pipe( pipe_slow );
7066%}
7067
7068// Store Pointer; for storing unknown oops and raw pointers
7069instruct storeP(memory mem, anyRegP src) %{
7070  match(Set mem (StoreP mem src));
7071
7072  ins_cost(125);
7073  format %{ "MOV    $mem,$src" %}
7074  opcode(0x89);
7075  ins_encode( OpcP, RegMem( src, mem ) );
7076  ins_pipe( ialu_mem_reg );
7077%}
7078
7079// Store Integer Immediate
7080instruct storeImmI(memory mem, immI src) %{
7081  match(Set mem (StoreI mem src));
7082
7083  ins_cost(150);
7084  format %{ "MOV    $mem,$src" %}
7085  opcode(0xC7);               /* C7 /0 */
7086  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7087  ins_pipe( ialu_mem_imm );
7088%}
7089
7090// Store Short/Char Immediate
7091instruct storeImmI16(memory mem, immI16 src) %{
7092  predicate(UseStoreImmI16);
7093  match(Set mem (StoreC mem src));
7094
7095  ins_cost(150);
7096  format %{ "MOV16  $mem,$src" %}
7097  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
7098  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
7099  ins_pipe( ialu_mem_imm );
7100%}
7101
7102// Store Pointer Immediate; null pointers or constant oops that do not
7103// need card-mark barriers.
7104instruct storeImmP(memory mem, immP src) %{
7105  match(Set mem (StoreP mem src));
7106
7107  ins_cost(150);
7108  format %{ "MOV    $mem,$src" %}
7109  opcode(0xC7);               /* C7 /0 */
7110  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
7111  ins_pipe( ialu_mem_imm );
7112%}
7113
7114// Store Byte Immediate
7115instruct storeImmB(memory mem, immI8 src) %{
7116  match(Set mem (StoreB mem src));
7117
7118  ins_cost(150);
7119  format %{ "MOV8   $mem,$src" %}
7120  opcode(0xC6);               /* C6 /0 */
7121  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7122  ins_pipe( ialu_mem_imm );
7123%}
7124
7125// Store Aligned Packed Byte XMM register to memory
7126instruct storeA8B(memory mem, regXD src) %{
7127  predicate(UseSSE>=1);
7128  match(Set mem (Store8B mem src));
7129  ins_cost(145);
7130  format %{ "MOVQ  $mem,$src\t! packed8B" %}
7131  ins_encode( movq_st(mem, src));
7132  ins_pipe( pipe_slow );
7133%}
7134
7135// Store Aligned Packed Char/Short XMM register to memory
7136instruct storeA4C(memory mem, regXD src) %{
7137  predicate(UseSSE>=1);
7138  match(Set mem (Store4C mem src));
7139  ins_cost(145);
7140  format %{ "MOVQ  $mem,$src\t! packed4C" %}
7141  ins_encode( movq_st(mem, src));
7142  ins_pipe( pipe_slow );
7143%}
7144
7145// Store Aligned Packed Integer XMM register to memory
7146instruct storeA2I(memory mem, regXD src) %{
7147  predicate(UseSSE>=1);
7148  match(Set mem (Store2I mem src));
7149  ins_cost(145);
7150  format %{ "MOVQ  $mem,$src\t! packed2I" %}
7151  ins_encode( movq_st(mem, src));
7152  ins_pipe( pipe_slow );
7153%}
7154
7155// Store CMS card-mark Immediate
7156instruct storeImmCM(memory mem, immI8 src) %{
7157  match(Set mem (StoreCM mem src));
7158
7159  ins_cost(150);
7160  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
7161  opcode(0xC6);               /* C6 /0 */
7162  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
7163  ins_pipe( ialu_mem_imm );
7164%}
7165
7166// Store Double
7167instruct storeD( memory mem, regDPR1 src) %{
7168  predicate(UseSSE<=1);
7169  match(Set mem (StoreD mem src));
7170
7171  ins_cost(100);
7172  format %{ "FST_D  $mem,$src" %}
7173  opcode(0xDD);       /* DD /2 */
7174  ins_encode( enc_FP_store(mem,src) );
7175  ins_pipe( fpu_mem_reg );
7176%}
7177
7178// Store double does rounding on x86
7179instruct storeD_rounded( memory mem, regDPR1 src) %{
7180  predicate(UseSSE<=1);
7181  match(Set mem (StoreD mem (RoundDouble src)));
7182
7183  ins_cost(100);
7184  format %{ "FST_D  $mem,$src\t# round" %}
7185  opcode(0xDD);       /* DD /2 */
7186  ins_encode( enc_FP_store(mem,src) );
7187  ins_pipe( fpu_mem_reg );
7188%}
7189
7190// Store XMM register to memory (double-precision floating points)
7191// MOVSD instruction
7192instruct storeXD(memory mem, regXD src) %{
7193  predicate(UseSSE>=2);
7194  match(Set mem (StoreD mem src));
7195  ins_cost(95);
7196  format %{ "MOVSD  $mem,$src" %}
7197  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7198  ins_pipe( pipe_slow );
7199%}
7200
7201// Store XMM register to memory (single-precision floating point)
7202// MOVSS instruction
7203instruct storeX(memory mem, regX src) %{
7204  predicate(UseSSE>=1);
7205  match(Set mem (StoreF mem src));
7206  ins_cost(95);
7207  format %{ "MOVSS  $mem,$src" %}
7208  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, mem));
7209  ins_pipe( pipe_slow );
7210%}
7211
7212// Store Aligned Packed Single Float XMM register to memory
7213instruct storeA2F(memory mem, regXD src) %{
7214  predicate(UseSSE>=1);
7215  match(Set mem (Store2F mem src));
7216  ins_cost(145);
7217  format %{ "MOVQ  $mem,$src\t! packed2F" %}
7218  ins_encode( movq_st(mem, src));
7219  ins_pipe( pipe_slow );
7220%}
7221
7222// Store Float
7223instruct storeF( memory mem, regFPR1 src) %{
7224  predicate(UseSSE==0);
7225  match(Set mem (StoreF mem src));
7226
7227  ins_cost(100);
7228  format %{ "FST_S  $mem,$src" %}
7229  opcode(0xD9);       /* D9 /2 */
7230  ins_encode( enc_FP_store(mem,src) );
7231  ins_pipe( fpu_mem_reg );
7232%}
7233
7234// Store Float does rounding on x86
7235instruct storeF_rounded( memory mem, regFPR1 src) %{
7236  predicate(UseSSE==0);
7237  match(Set mem (StoreF mem (RoundFloat src)));
7238
7239  ins_cost(100);
7240  format %{ "FST_S  $mem,$src\t# round" %}
7241  opcode(0xD9);       /* D9 /2 */
7242  ins_encode( enc_FP_store(mem,src) );
7243  ins_pipe( fpu_mem_reg );
7244%}
7245
7246// Store Float does rounding on x86
7247instruct storeF_Drounded( memory mem, regDPR1 src) %{
7248  predicate(UseSSE<=1);
7249  match(Set mem (StoreF mem (ConvD2F src)));
7250
7251  ins_cost(100);
7252  format %{ "FST_S  $mem,$src\t# D-round" %}
7253  opcode(0xD9);       /* D9 /2 */
7254  ins_encode( enc_FP_store(mem,src) );
7255  ins_pipe( fpu_mem_reg );
7256%}
7257
7258// Store immediate Float value (it is faster than store from FPU register)
7259// The instruction usage is guarded by predicate in operand immF().
7260instruct storeF_imm( memory mem, immF src) %{
7261  match(Set mem (StoreF mem src));
7262
7263  ins_cost(50);
7264  format %{ "MOV    $mem,$src\t# store float" %}
7265  opcode(0xC7);               /* C7 /0 */
7266  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
7267  ins_pipe( ialu_mem_imm );
7268%}
7269
7270// Store immediate Float value (it is faster than store from XMM register)
7271// The instruction usage is guarded by predicate in operand immXF().
7272instruct storeX_imm( memory mem, immXF src) %{
7273  match(Set mem (StoreF mem src));
7274
7275  ins_cost(50);
7276  format %{ "MOV    $mem,$src\t# store float" %}
7277  opcode(0xC7);               /* C7 /0 */
7278  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32XF_as_bits( src ));
7279  ins_pipe( ialu_mem_imm );
7280%}
7281
7282// Store Integer to stack slot
7283instruct storeSSI(stackSlotI dst, eRegI src) %{
7284  match(Set dst src);
7285
7286  ins_cost(100);
7287  format %{ "MOV    $dst,$src" %}
7288  opcode(0x89);
7289  ins_encode( OpcPRegSS( dst, src ) );
7290  ins_pipe( ialu_mem_reg );
7291%}
7292
7293// Store Integer to stack slot
7294instruct storeSSP(stackSlotP dst, eRegP src) %{
7295  match(Set dst src);
7296
7297  ins_cost(100);
7298  format %{ "MOV    $dst,$src" %}
7299  opcode(0x89);
7300  ins_encode( OpcPRegSS( dst, src ) );
7301  ins_pipe( ialu_mem_reg );
7302%}
7303
7304// Store Long to stack slot
7305instruct storeSSL(stackSlotL dst, eRegL src) %{
7306  match(Set dst src);
7307
7308  ins_cost(200);
7309  format %{ "MOV    $dst,$src.lo\n\t"
7310            "MOV    $dst+4,$src.hi" %}
7311  opcode(0x89, 0x89);
7312  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
7313  ins_pipe( ialu_mem_long_reg );
7314%}
7315
7316//----------MemBar Instructions-----------------------------------------------
7317// Memory barrier flavors
7318
7319instruct membar_acquire() %{
7320  match(MemBarAcquire);
7321  ins_cost(400);
7322
7323  size(0);
7324  format %{ "MEMBAR-acquire" %}
7325  ins_encode( enc_membar_acquire );
7326  ins_pipe(pipe_slow);
7327%}
7328
7329instruct membar_acquire_lock() %{
7330  match(MemBarAcquire);
7331  predicate(Matcher::prior_fast_lock(n));
7332  ins_cost(0);
7333
7334  size(0);
7335  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
7336  ins_encode( );
7337  ins_pipe(empty);
7338%}
7339
7340instruct membar_release() %{
7341  match(MemBarRelease);
7342  ins_cost(400);
7343
7344  size(0);
7345  format %{ "MEMBAR-release" %}
7346  ins_encode( enc_membar_release );
7347  ins_pipe(pipe_slow);
7348%}
7349
7350instruct membar_release_lock() %{
7351  match(MemBarRelease);
7352  predicate(Matcher::post_fast_unlock(n));
7353  ins_cost(0);
7354
7355  size(0);
7356  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
7357  ins_encode( );
7358  ins_pipe(empty);
7359%}
7360
7361instruct membar_volatile() %{
7362  match(MemBarVolatile);
7363  ins_cost(400);
7364
7365  format %{ "MEMBAR-volatile" %}
7366  ins_encode( enc_membar_volatile );
7367  ins_pipe(pipe_slow);
7368%}
7369
7370instruct unnecessary_membar_volatile() %{
7371  match(MemBarVolatile);
7372  predicate(Matcher::post_store_load_barrier(n));
7373  ins_cost(0);
7374
7375  size(0);
7376  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
7377  ins_encode( );
7378  ins_pipe(empty);
7379%}
7380
7381//----------Move Instructions--------------------------------------------------
7382instruct castX2P(eAXRegP dst, eAXRegI src) %{
7383  match(Set dst (CastX2P src));
7384  format %{ "# X2P  $dst, $src" %}
7385  ins_encode( /*empty encoding*/ );
7386  ins_cost(0);
7387  ins_pipe(empty);
7388%}
7389
7390instruct castP2X(eRegI dst, eRegP src ) %{
7391  match(Set dst (CastP2X src));
7392  ins_cost(50);
7393  format %{ "MOV    $dst, $src\t# CastP2X" %}
7394  ins_encode( enc_Copy( dst, src) );
7395  ins_pipe( ialu_reg_reg );
7396%}
7397
7398//----------Conditional Move---------------------------------------------------
7399// Conditional move
7400instruct cmovI_reg(eRegI dst, eRegI src, eFlagsReg cr, cmpOp cop ) %{
7401  predicate(VM_Version::supports_cmov() );
7402  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7403  ins_cost(200);
7404  format %{ "CMOV$cop $dst,$src" %}
7405  opcode(0x0F,0x40);
7406  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7407  ins_pipe( pipe_cmov_reg );
7408%}
7409
7410instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, eRegI dst, eRegI src ) %{
7411  predicate(VM_Version::supports_cmov() );
7412  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7413  ins_cost(200);
7414  format %{ "CMOV$cop $dst,$src" %}
7415  opcode(0x0F,0x40);
7416  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7417  ins_pipe( pipe_cmov_reg );
7418%}
7419
7420instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, eRegI src ) %{
7421  predicate(VM_Version::supports_cmov() );
7422  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
7423  ins_cost(200);
7424  expand %{
7425    cmovI_regU(cop, cr, dst, src);
7426  %}
7427%}
7428
7429// Conditional move
7430instruct cmovI_mem(cmpOp cop, eFlagsReg cr, eRegI dst, memory src) %{
7431  predicate(VM_Version::supports_cmov() );
7432  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7433  ins_cost(250);
7434  format %{ "CMOV$cop $dst,$src" %}
7435  opcode(0x0F,0x40);
7436  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7437  ins_pipe( pipe_cmov_mem );
7438%}
7439
7440// Conditional move
7441instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, eRegI dst, memory src) %{
7442  predicate(VM_Version::supports_cmov() );
7443  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7444  ins_cost(250);
7445  format %{ "CMOV$cop $dst,$src" %}
7446  opcode(0x0F,0x40);
7447  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7448  ins_pipe( pipe_cmov_mem );
7449%}
7450
7451instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegI dst, memory src) %{
7452  predicate(VM_Version::supports_cmov() );
7453  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
7454  ins_cost(250);
7455  expand %{
7456    cmovI_memU(cop, cr, dst, src);
7457  %}
7458%}
7459
7460// Conditional move
7461instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7462  predicate(VM_Version::supports_cmov() );
7463  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7464  ins_cost(200);
7465  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7466  opcode(0x0F,0x40);
7467  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7468  ins_pipe( pipe_cmov_reg );
7469%}
7470
7471// Conditional move (non-P6 version)
7472// Note:  a CMoveP is generated for  stubs and native wrappers
7473//        regardless of whether we are on a P6, so we
7474//        emulate a cmov here
7475instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
7476  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7477  ins_cost(300);
7478  format %{ "Jn$cop   skip\n\t"
7479          "MOV    $dst,$src\t# pointer\n"
7480      "skip:" %}
7481  opcode(0x8b);
7482  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
7483  ins_pipe( pipe_cmov_reg );
7484%}
7485
7486// Conditional move
7487instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
7488  predicate(VM_Version::supports_cmov() );
7489  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7490  ins_cost(200);
7491  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7492  opcode(0x0F,0x40);
7493  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
7494  ins_pipe( pipe_cmov_reg );
7495%}
7496
7497instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
7498  predicate(VM_Version::supports_cmov() );
7499  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
7500  ins_cost(200);
7501  expand %{
7502    cmovP_regU(cop, cr, dst, src);
7503  %}
7504%}
7505
7506// DISABLED: Requires the ADLC to emit a bottom_type call that
7507// correctly meets the two pointer arguments; one is an incoming
7508// register but the other is a memory operand.  ALSO appears to
7509// be buggy with implicit null checks.
7510//
7511//// Conditional move
7512//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
7513//  predicate(VM_Version::supports_cmov() );
7514//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7515//  ins_cost(250);
7516//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7517//  opcode(0x0F,0x40);
7518//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7519//  ins_pipe( pipe_cmov_mem );
7520//%}
7521//
7522//// Conditional move
7523//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
7524//  predicate(VM_Version::supports_cmov() );
7525//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
7526//  ins_cost(250);
7527//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
7528//  opcode(0x0F,0x40);
7529//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
7530//  ins_pipe( pipe_cmov_mem );
7531//%}
7532
7533// Conditional move
7534instruct fcmovD_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regD src) %{
7535  predicate(UseSSE<=1);
7536  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7537  ins_cost(200);
7538  format %{ "FCMOV$cop $dst,$src\t# double" %}
7539  opcode(0xDA);
7540  ins_encode( enc_cmov_d(cop,src) );
7541  ins_pipe( pipe_cmovD_reg );
7542%}
7543
7544// Conditional move
7545instruct fcmovF_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regF src) %{
7546  predicate(UseSSE==0);
7547  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7548  ins_cost(200);
7549  format %{ "FCMOV$cop $dst,$src\t# float" %}
7550  opcode(0xDA);
7551  ins_encode( enc_cmov_d(cop,src) );
7552  ins_pipe( pipe_cmovD_reg );
7553%}
7554
7555// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7556instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
7557  predicate(UseSSE<=1);
7558  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7559  ins_cost(200);
7560  format %{ "Jn$cop   skip\n\t"
7561            "MOV    $dst,$src\t# double\n"
7562      "skip:" %}
7563  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7564  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_D(src), OpcP, RegOpc(dst) );
7565  ins_pipe( pipe_cmovD_reg );
7566%}
7567
7568// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
7569instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
7570  predicate(UseSSE==0);
7571  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7572  ins_cost(200);
7573  format %{ "Jn$cop    skip\n\t"
7574            "MOV    $dst,$src\t# float\n"
7575      "skip:" %}
7576  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
7577  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_F(src), OpcP, RegOpc(dst) );
7578  ins_pipe( pipe_cmovD_reg );
7579%}
7580
7581// No CMOVE with SSE/SSE2
7582instruct fcmovX_regS(cmpOp cop, eFlagsReg cr, regX dst, regX src) %{
7583  predicate (UseSSE>=1);
7584  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7585  ins_cost(200);
7586  format %{ "Jn$cop   skip\n\t"
7587            "MOVSS  $dst,$src\t# float\n"
7588      "skip:" %}
7589  ins_encode %{
7590    Label skip;
7591    // Invert sense of branch from sense of CMOV
7592    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7593    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7594    __ bind(skip);
7595  %}
7596  ins_pipe( pipe_slow );
7597%}
7598
7599// No CMOVE with SSE/SSE2
7600instruct fcmovXD_regS(cmpOp cop, eFlagsReg cr, regXD dst, regXD src) %{
7601  predicate (UseSSE>=2);
7602  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7603  ins_cost(200);
7604  format %{ "Jn$cop   skip\n\t"
7605            "MOVSD  $dst,$src\t# float\n"
7606      "skip:" %}
7607  ins_encode %{
7608    Label skip;
7609    // Invert sense of branch from sense of CMOV
7610    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7611    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7612    __ bind(skip);
7613  %}
7614  ins_pipe( pipe_slow );
7615%}
7616
7617// unsigned version
7618instruct fcmovX_regU(cmpOpU cop, eFlagsRegU cr, regX dst, regX src) %{
7619  predicate (UseSSE>=1);
7620  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7621  ins_cost(200);
7622  format %{ "Jn$cop   skip\n\t"
7623            "MOVSS  $dst,$src\t# float\n"
7624      "skip:" %}
7625  ins_encode %{
7626    Label skip;
7627    // Invert sense of branch from sense of CMOV
7628    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7629    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
7630    __ bind(skip);
7631  %}
7632  ins_pipe( pipe_slow );
7633%}
7634
7635instruct fcmovX_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regX dst, regX src) %{
7636  predicate (UseSSE>=1);
7637  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
7638  ins_cost(200);
7639  expand %{
7640    fcmovX_regU(cop, cr, dst, src);
7641  %}
7642%}
7643
7644// unsigned version
7645instruct fcmovXD_regU(cmpOpU cop, eFlagsRegU cr, regXD dst, regXD src) %{
7646  predicate (UseSSE>=2);
7647  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7648  ins_cost(200);
7649  format %{ "Jn$cop   skip\n\t"
7650            "MOVSD  $dst,$src\t# float\n"
7651      "skip:" %}
7652  ins_encode %{
7653    Label skip;
7654    // Invert sense of branch from sense of CMOV
7655    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
7656    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
7657    __ bind(skip);
7658  %}
7659  ins_pipe( pipe_slow );
7660%}
7661
7662instruct fcmovXD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regXD dst, regXD src) %{
7663  predicate (UseSSE>=2);
7664  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
7665  ins_cost(200);
7666  expand %{
7667    fcmovXD_regU(cop, cr, dst, src);
7668  %}
7669%}
7670
7671instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
7672  predicate(VM_Version::supports_cmov() );
7673  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7674  ins_cost(200);
7675  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7676            "CMOV$cop $dst.hi,$src.hi" %}
7677  opcode(0x0F,0x40);
7678  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7679  ins_pipe( pipe_cmov_reg_long );
7680%}
7681
7682instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
7683  predicate(VM_Version::supports_cmov() );
7684  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7685  ins_cost(200);
7686  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
7687            "CMOV$cop $dst.hi,$src.hi" %}
7688  opcode(0x0F,0x40);
7689  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
7690  ins_pipe( pipe_cmov_reg_long );
7691%}
7692
7693instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
7694  predicate(VM_Version::supports_cmov() );
7695  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
7696  ins_cost(200);
7697  expand %{
7698    cmovL_regU(cop, cr, dst, src);
7699  %}
7700%}
7701
7702//----------Arithmetic Instructions--------------------------------------------
7703//----------Addition Instructions----------------------------------------------
7704// Integer Addition Instructions
7705instruct addI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
7706  match(Set dst (AddI dst src));
7707  effect(KILL cr);
7708
7709  size(2);
7710  format %{ "ADD    $dst,$src" %}
7711  opcode(0x03);
7712  ins_encode( OpcP, RegReg( dst, src) );
7713  ins_pipe( ialu_reg_reg );
7714%}
7715
7716instruct addI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
7717  match(Set dst (AddI dst src));
7718  effect(KILL cr);
7719
7720  format %{ "ADD    $dst,$src" %}
7721  opcode(0x81, 0x00); /* /0 id */
7722  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7723  ins_pipe( ialu_reg );
7724%}
7725
7726instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
7727  predicate(UseIncDec);
7728  match(Set dst (AddI dst src));
7729  effect(KILL cr);
7730
7731  size(1);
7732  format %{ "INC    $dst" %}
7733  opcode(0x40); /*  */
7734  ins_encode( Opc_plus( primary, dst ) );
7735  ins_pipe( ialu_reg );
7736%}
7737
7738instruct leaI_eReg_immI(eRegI dst, eRegI src0, immI src1) %{
7739  match(Set dst (AddI src0 src1));
7740  ins_cost(110);
7741
7742  format %{ "LEA    $dst,[$src0 + $src1]" %}
7743  opcode(0x8D); /* 0x8D /r */
7744  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7745  ins_pipe( ialu_reg_reg );
7746%}
7747
7748instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7749  match(Set dst (AddP src0 src1));
7750  ins_cost(110);
7751
7752  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7753  opcode(0x8D); /* 0x8D /r */
7754  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7755  ins_pipe( ialu_reg_reg );
7756%}
7757
7758instruct decI_eReg(eRegI dst, immI_M1 src, eFlagsReg cr) %{
7759  predicate(UseIncDec);
7760  match(Set dst (AddI dst src));
7761  effect(KILL cr);
7762
7763  size(1);
7764  format %{ "DEC    $dst" %}
7765  opcode(0x48); /*  */
7766  ins_encode( Opc_plus( primary, dst ) );
7767  ins_pipe( ialu_reg );
7768%}
7769
7770instruct addP_eReg(eRegP dst, eRegI src, eFlagsReg cr) %{
7771  match(Set dst (AddP dst src));
7772  effect(KILL cr);
7773
7774  size(2);
7775  format %{ "ADD    $dst,$src" %}
7776  opcode(0x03);
7777  ins_encode( OpcP, RegReg( dst, src) );
7778  ins_pipe( ialu_reg_reg );
7779%}
7780
7781instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7782  match(Set dst (AddP dst src));
7783  effect(KILL cr);
7784
7785  format %{ "ADD    $dst,$src" %}
7786  opcode(0x81,0x00); /* Opcode 81 /0 id */
7787  // ins_encode( RegImm( dst, src) );
7788  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7789  ins_pipe( ialu_reg );
7790%}
7791
7792instruct addI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
7793  match(Set dst (AddI dst (LoadI src)));
7794  effect(KILL cr);
7795
7796  ins_cost(125);
7797  format %{ "ADD    $dst,$src" %}
7798  opcode(0x03);
7799  ins_encode( OpcP, RegMem( dst, src) );
7800  ins_pipe( ialu_reg_mem );
7801%}
7802
7803instruct addI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
7804  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7805  effect(KILL cr);
7806
7807  ins_cost(150);
7808  format %{ "ADD    $dst,$src" %}
7809  opcode(0x01);  /* Opcode 01 /r */
7810  ins_encode( OpcP, RegMem( src, dst ) );
7811  ins_pipe( ialu_mem_reg );
7812%}
7813
7814// Add Memory with Immediate
7815instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7816  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7817  effect(KILL cr);
7818
7819  ins_cost(125);
7820  format %{ "ADD    $dst,$src" %}
7821  opcode(0x81);               /* Opcode 81 /0 id */
7822  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7823  ins_pipe( ialu_mem_imm );
7824%}
7825
7826instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7827  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7828  effect(KILL cr);
7829
7830  ins_cost(125);
7831  format %{ "INC    $dst" %}
7832  opcode(0xFF);               /* Opcode FF /0 */
7833  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7834  ins_pipe( ialu_mem_imm );
7835%}
7836
7837instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7838  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7839  effect(KILL cr);
7840
7841  ins_cost(125);
7842  format %{ "DEC    $dst" %}
7843  opcode(0xFF);               /* Opcode FF /1 */
7844  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7845  ins_pipe( ialu_mem_imm );
7846%}
7847
7848
7849instruct checkCastPP( eRegP dst ) %{
7850  match(Set dst (CheckCastPP dst));
7851
7852  size(0);
7853  format %{ "#checkcastPP of $dst" %}
7854  ins_encode( /*empty encoding*/ );
7855  ins_pipe( empty );
7856%}
7857
7858instruct castPP( eRegP dst ) %{
7859  match(Set dst (CastPP dst));
7860  format %{ "#castPP of $dst" %}
7861  ins_encode( /*empty encoding*/ );
7862  ins_pipe( empty );
7863%}
7864
7865instruct castII( eRegI dst ) %{
7866  match(Set dst (CastII dst));
7867  format %{ "#castII of $dst" %}
7868  ins_encode( /*empty encoding*/ );
7869  ins_cost(0);
7870  ins_pipe( empty );
7871%}
7872
7873
7874// Load-locked - same as a regular pointer load when used with compare-swap
7875instruct loadPLocked(eRegP dst, memory mem) %{
7876  match(Set dst (LoadPLocked mem));
7877
7878  ins_cost(125);
7879  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7880  opcode(0x8B);
7881  ins_encode( OpcP, RegMem(dst,mem));
7882  ins_pipe( ialu_reg_mem );
7883%}
7884
7885// LoadLong-locked - same as a volatile long load when used with compare-swap
7886instruct loadLLocked(stackSlotL dst, load_long_memory mem) %{
7887  predicate(UseSSE<=1);
7888  match(Set dst (LoadLLocked mem));
7889
7890  ins_cost(200);
7891  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
7892            "FISTp  $dst" %}
7893  ins_encode(enc_loadL_volatile(mem,dst));
7894  ins_pipe( fpu_reg_mem );
7895%}
7896
7897instruct loadLX_Locked(stackSlotL dst, load_long_memory mem, regXD tmp) %{
7898  predicate(UseSSE>=2);
7899  match(Set dst (LoadLLocked mem));
7900  effect(TEMP tmp);
7901  ins_cost(180);
7902  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
7903            "MOVSD  $dst,$tmp" %}
7904  ins_encode(enc_loadLX_volatile(mem, dst, tmp));
7905  ins_pipe( pipe_slow );
7906%}
7907
7908instruct loadLX_reg_Locked(eRegL dst, load_long_memory mem, regXD tmp) %{
7909  predicate(UseSSE>=2);
7910  match(Set dst (LoadLLocked mem));
7911  effect(TEMP tmp);
7912  ins_cost(160);
7913  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
7914            "MOVD   $dst.lo,$tmp\n\t"
7915            "PSRLQ  $tmp,32\n\t"
7916            "MOVD   $dst.hi,$tmp" %}
7917  ins_encode(enc_loadLX_reg_volatile(mem, dst, tmp));
7918  ins_pipe( pipe_slow );
7919%}
7920
7921// Conditional-store of the updated heap-top.
7922// Used during allocation of the shared heap.
7923// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7924instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7925  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7926  // EAX is killed if there is contention, but then it's also unused.
7927  // In the common case of no contention, EAX holds the new oop address.
7928  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7929  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7930  ins_pipe( pipe_cmpxchg );
7931%}
7932
7933// Conditional-store of an int value.
7934// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7935instruct storeIConditional( memory mem, eAXRegI oldval, eRegI newval, eFlagsReg cr ) %{
7936  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7937  effect(KILL oldval);
7938  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7939  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7940  ins_pipe( pipe_cmpxchg );
7941%}
7942
7943// Conditional-store of a long value.
7944// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7945instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7946  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7947  effect(KILL oldval);
7948  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7949            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7950            "XCHG   EBX,ECX"
7951  %}
7952  ins_encode %{
7953    // Note: we need to swap rbx, and rcx before and after the
7954    //       cmpxchg8 instruction because the instruction uses
7955    //       rcx as the high order word of the new value to store but
7956    //       our register encoding uses rbx.
7957    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7958    if( os::is_MP() )
7959      __ lock();
7960    __ cmpxchg8(Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp));
7961    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7962  %}
7963  ins_pipe( pipe_cmpxchg );
7964%}
7965
7966// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7967
7968instruct compareAndSwapL( eRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7969  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7970  effect(KILL cr, KILL oldval);
7971  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7972            "MOV    $res,0\n\t"
7973            "JNE,s  fail\n\t"
7974            "MOV    $res,1\n"
7975          "fail:" %}
7976  ins_encode( enc_cmpxchg8(mem_ptr),
7977              enc_flags_ne_to_boolean(res) );
7978  ins_pipe( pipe_cmpxchg );
7979%}
7980
7981instruct compareAndSwapP( eRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7982  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7983  effect(KILL cr, KILL oldval);
7984  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7985            "MOV    $res,0\n\t"
7986            "JNE,s  fail\n\t"
7987            "MOV    $res,1\n"
7988          "fail:" %}
7989  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7990  ins_pipe( pipe_cmpxchg );
7991%}
7992
7993instruct compareAndSwapI( eRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7994  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7995  effect(KILL cr, KILL oldval);
7996  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7997            "MOV    $res,0\n\t"
7998            "JNE,s  fail\n\t"
7999            "MOV    $res,1\n"
8000          "fail:" %}
8001  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
8002  ins_pipe( pipe_cmpxchg );
8003%}
8004
8005//----------Subtraction Instructions-------------------------------------------
8006// Integer Subtraction Instructions
8007instruct subI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8008  match(Set dst (SubI dst src));
8009  effect(KILL cr);
8010
8011  size(2);
8012  format %{ "SUB    $dst,$src" %}
8013  opcode(0x2B);
8014  ins_encode( OpcP, RegReg( dst, src) );
8015  ins_pipe( ialu_reg_reg );
8016%}
8017
8018instruct subI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8019  match(Set dst (SubI dst src));
8020  effect(KILL cr);
8021
8022  format %{ "SUB    $dst,$src" %}
8023  opcode(0x81,0x05);  /* Opcode 81 /5 */
8024  // ins_encode( RegImm( dst, src) );
8025  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8026  ins_pipe( ialu_reg );
8027%}
8028
8029instruct subI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8030  match(Set dst (SubI dst (LoadI src)));
8031  effect(KILL cr);
8032
8033  ins_cost(125);
8034  format %{ "SUB    $dst,$src" %}
8035  opcode(0x2B);
8036  ins_encode( OpcP, RegMem( dst, src) );
8037  ins_pipe( ialu_reg_mem );
8038%}
8039
8040instruct subI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8041  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
8042  effect(KILL cr);
8043
8044  ins_cost(150);
8045  format %{ "SUB    $dst,$src" %}
8046  opcode(0x29);  /* Opcode 29 /r */
8047  ins_encode( OpcP, RegMem( src, dst ) );
8048  ins_pipe( ialu_mem_reg );
8049%}
8050
8051// Subtract from a pointer
8052instruct subP_eReg(eRegP dst, eRegI src, immI0 zero, eFlagsReg cr) %{
8053  match(Set dst (AddP dst (SubI zero src)));
8054  effect(KILL cr);
8055
8056  size(2);
8057  format %{ "SUB    $dst,$src" %}
8058  opcode(0x2B);
8059  ins_encode( OpcP, RegReg( dst, src) );
8060  ins_pipe( ialu_reg_reg );
8061%}
8062
8063instruct negI_eReg(eRegI dst, immI0 zero, eFlagsReg cr) %{
8064  match(Set dst (SubI zero dst));
8065  effect(KILL cr);
8066
8067  size(2);
8068  format %{ "NEG    $dst" %}
8069  opcode(0xF7,0x03);  // Opcode F7 /3
8070  ins_encode( OpcP, RegOpc( dst ) );
8071  ins_pipe( ialu_reg );
8072%}
8073
8074
8075//----------Multiplication/Division Instructions-------------------------------
8076// Integer Multiplication Instructions
8077// Multiply Register
8078instruct mulI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8079  match(Set dst (MulI dst src));
8080  effect(KILL cr);
8081
8082  size(3);
8083  ins_cost(300);
8084  format %{ "IMUL   $dst,$src" %}
8085  opcode(0xAF, 0x0F);
8086  ins_encode( OpcS, OpcP, RegReg( dst, src) );
8087  ins_pipe( ialu_reg_reg_alu0 );
8088%}
8089
8090// Multiply 32-bit Immediate
8091instruct mulI_eReg_imm(eRegI dst, eRegI src, immI imm, eFlagsReg cr) %{
8092  match(Set dst (MulI src imm));
8093  effect(KILL cr);
8094
8095  ins_cost(300);
8096  format %{ "IMUL   $dst,$src,$imm" %}
8097  opcode(0x69);  /* 69 /r id */
8098  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
8099  ins_pipe( ialu_reg_reg_alu0 );
8100%}
8101
8102instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
8103  match(Set dst src);
8104  effect(KILL cr);
8105
8106  // Note that this is artificially increased to make it more expensive than loadConL
8107  ins_cost(250);
8108  format %{ "MOV    EAX,$src\t// low word only" %}
8109  opcode(0xB8);
8110  ins_encode( LdImmL_Lo(dst, src) );
8111  ins_pipe( ialu_reg_fat );
8112%}
8113
8114// Multiply by 32-bit Immediate, taking the shifted high order results
8115//  (special case for shift by 32)
8116instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
8117  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8118  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8119             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8120             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8121  effect(USE src1, KILL cr);
8122
8123  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8124  ins_cost(0*100 + 1*400 - 150);
8125  format %{ "IMUL   EDX:EAX,$src1" %}
8126  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8127  ins_pipe( pipe_slow );
8128%}
8129
8130// Multiply by 32-bit Immediate, taking the shifted high order results
8131instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
8132  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
8133  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
8134             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
8135             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
8136  effect(USE src1, KILL cr);
8137
8138  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
8139  ins_cost(1*100 + 1*400 - 150);
8140  format %{ "IMUL   EDX:EAX,$src1\n\t"
8141            "SAR    EDX,$cnt-32" %}
8142  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
8143  ins_pipe( pipe_slow );
8144%}
8145
8146// Multiply Memory 32-bit Immediate
8147instruct mulI_mem_imm(eRegI dst, memory src, immI imm, eFlagsReg cr) %{
8148  match(Set dst (MulI (LoadI src) imm));
8149  effect(KILL cr);
8150
8151  ins_cost(300);
8152  format %{ "IMUL   $dst,$src,$imm" %}
8153  opcode(0x69);  /* 69 /r id */
8154  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
8155  ins_pipe( ialu_reg_mem_alu0 );
8156%}
8157
8158// Multiply Memory
8159instruct mulI(eRegI dst, memory src, eFlagsReg cr) %{
8160  match(Set dst (MulI dst (LoadI src)));
8161  effect(KILL cr);
8162
8163  ins_cost(350);
8164  format %{ "IMUL   $dst,$src" %}
8165  opcode(0xAF, 0x0F);
8166  ins_encode( OpcS, OpcP, RegMem( dst, src) );
8167  ins_pipe( ialu_reg_mem_alu0 );
8168%}
8169
8170// Multiply Register Int to Long
8171instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
8172  // Basic Idea: long = (long)int * (long)int
8173  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
8174  effect(DEF dst, USE src, USE src1, KILL flags);
8175
8176  ins_cost(300);
8177  format %{ "IMUL   $dst,$src1" %}
8178
8179  ins_encode( long_int_multiply( dst, src1 ) );
8180  ins_pipe( ialu_reg_reg_alu0 );
8181%}
8182
8183instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
8184  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
8185  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
8186  effect(KILL flags);
8187
8188  ins_cost(300);
8189  format %{ "MUL    $dst,$src1" %}
8190
8191  ins_encode( long_uint_multiply(dst, src1) );
8192  ins_pipe( ialu_reg_reg_alu0 );
8193%}
8194
8195// Multiply Register Long
8196instruct mulL_eReg(eADXRegL dst, eRegL src, eRegI tmp, eFlagsReg cr) %{
8197  match(Set dst (MulL dst src));
8198  effect(KILL cr, TEMP tmp);
8199  ins_cost(4*100+3*400);
8200// Basic idea: lo(result) = lo(x_lo * y_lo)
8201//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
8202  format %{ "MOV    $tmp,$src.lo\n\t"
8203            "IMUL   $tmp,EDX\n\t"
8204            "MOV    EDX,$src.hi\n\t"
8205            "IMUL   EDX,EAX\n\t"
8206            "ADD    $tmp,EDX\n\t"
8207            "MUL    EDX:EAX,$src.lo\n\t"
8208            "ADD    EDX,$tmp" %}
8209  ins_encode( long_multiply( dst, src, tmp ) );
8210  ins_pipe( pipe_slow );
8211%}
8212
8213// Multiply Register Long by small constant
8214instruct mulL_eReg_con(eADXRegL dst, immL_127 src, eRegI tmp, eFlagsReg cr) %{
8215  match(Set dst (MulL dst src));
8216  effect(KILL cr, TEMP tmp);
8217  ins_cost(2*100+2*400);
8218  size(12);
8219// Basic idea: lo(result) = lo(src * EAX)
8220//             hi(result) = hi(src * EAX) + lo(src * EDX)
8221  format %{ "IMUL   $tmp,EDX,$src\n\t"
8222            "MOV    EDX,$src\n\t"
8223            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
8224            "ADD    EDX,$tmp" %}
8225  ins_encode( long_multiply_con( dst, src, tmp ) );
8226  ins_pipe( pipe_slow );
8227%}
8228
8229// Integer DIV with Register
8230instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8231  match(Set rax (DivI rax div));
8232  effect(KILL rdx, KILL cr);
8233  size(26);
8234  ins_cost(30*100+10*100);
8235  format %{ "CMP    EAX,0x80000000\n\t"
8236            "JNE,s  normal\n\t"
8237            "XOR    EDX,EDX\n\t"
8238            "CMP    ECX,-1\n\t"
8239            "JE,s   done\n"
8240    "normal: CDQ\n\t"
8241            "IDIV   $div\n\t"
8242    "done:"        %}
8243  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8244  ins_encode( cdq_enc, OpcP, RegOpc(div) );
8245  ins_pipe( ialu_reg_reg_alu0 );
8246%}
8247
8248// Divide Register Long
8249instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8250  match(Set dst (DivL src1 src2));
8251  effect( KILL cr, KILL cx, KILL bx );
8252  ins_cost(10000);
8253  format %{ "PUSH   $src1.hi\n\t"
8254            "PUSH   $src1.lo\n\t"
8255            "PUSH   $src2.hi\n\t"
8256            "PUSH   $src2.lo\n\t"
8257            "CALL   SharedRuntime::ldiv\n\t"
8258            "ADD    ESP,16" %}
8259  ins_encode( long_div(src1,src2) );
8260  ins_pipe( pipe_slow );
8261%}
8262
8263// Integer DIVMOD with Register, both quotient and mod results
8264instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
8265  match(DivModI rax div);
8266  effect(KILL cr);
8267  size(26);
8268  ins_cost(30*100+10*100);
8269  format %{ "CMP    EAX,0x80000000\n\t"
8270            "JNE,s  normal\n\t"
8271            "XOR    EDX,EDX\n\t"
8272            "CMP    ECX,-1\n\t"
8273            "JE,s   done\n"
8274    "normal: CDQ\n\t"
8275            "IDIV   $div\n\t"
8276    "done:"        %}
8277  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8278  ins_encode( cdq_enc, OpcP, RegOpc(div) );
8279  ins_pipe( pipe_slow );
8280%}
8281
8282// Integer MOD with Register
8283instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
8284  match(Set rdx (ModI rax div));
8285  effect(KILL rax, KILL cr);
8286
8287  size(26);
8288  ins_cost(300);
8289  format %{ "CDQ\n\t"
8290            "IDIV   $div" %}
8291  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
8292  ins_encode( cdq_enc, OpcP, RegOpc(div) );
8293  ins_pipe( ialu_reg_reg_alu0 );
8294%}
8295
8296// Remainder Register Long
8297instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
8298  match(Set dst (ModL src1 src2));
8299  effect( KILL cr, KILL cx, KILL bx );
8300  ins_cost(10000);
8301  format %{ "PUSH   $src1.hi\n\t"
8302            "PUSH   $src1.lo\n\t"
8303            "PUSH   $src2.hi\n\t"
8304            "PUSH   $src2.lo\n\t"
8305            "CALL   SharedRuntime::lrem\n\t"
8306            "ADD    ESP,16" %}
8307  ins_encode( long_mod(src1,src2) );
8308  ins_pipe( pipe_slow );
8309%}
8310
8311// Integer Shift Instructions
8312// Shift Left by one
8313instruct shlI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8314  match(Set dst (LShiftI dst shift));
8315  effect(KILL cr);
8316
8317  size(2);
8318  format %{ "SHL    $dst,$shift" %}
8319  opcode(0xD1, 0x4);  /* D1 /4 */
8320  ins_encode( OpcP, RegOpc( dst ) );
8321  ins_pipe( ialu_reg );
8322%}
8323
8324// Shift Left by 8-bit immediate
8325instruct salI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8326  match(Set dst (LShiftI dst shift));
8327  effect(KILL cr);
8328
8329  size(3);
8330  format %{ "SHL    $dst,$shift" %}
8331  opcode(0xC1, 0x4);  /* C1 /4 ib */
8332  ins_encode( RegOpcImm( dst, shift) );
8333  ins_pipe( ialu_reg );
8334%}
8335
8336// Shift Left by variable
8337instruct salI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8338  match(Set dst (LShiftI dst shift));
8339  effect(KILL cr);
8340
8341  size(2);
8342  format %{ "SHL    $dst,$shift" %}
8343  opcode(0xD3, 0x4);  /* D3 /4 */
8344  ins_encode( OpcP, RegOpc( dst ) );
8345  ins_pipe( ialu_reg_reg );
8346%}
8347
8348// Arithmetic shift right by one
8349instruct sarI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8350  match(Set dst (RShiftI dst shift));
8351  effect(KILL cr);
8352
8353  size(2);
8354  format %{ "SAR    $dst,$shift" %}
8355  opcode(0xD1, 0x7);  /* D1 /7 */
8356  ins_encode( OpcP, RegOpc( dst ) );
8357  ins_pipe( ialu_reg );
8358%}
8359
8360// Arithmetic shift right by one
8361instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
8362  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8363  effect(KILL cr);
8364  format %{ "SAR    $dst,$shift" %}
8365  opcode(0xD1, 0x7);  /* D1 /7 */
8366  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
8367  ins_pipe( ialu_mem_imm );
8368%}
8369
8370// Arithmetic Shift Right by 8-bit immediate
8371instruct sarI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8372  match(Set dst (RShiftI dst shift));
8373  effect(KILL cr);
8374
8375  size(3);
8376  format %{ "SAR    $dst,$shift" %}
8377  opcode(0xC1, 0x7);  /* C1 /7 ib */
8378  ins_encode( RegOpcImm( dst, shift ) );
8379  ins_pipe( ialu_mem_imm );
8380%}
8381
8382// Arithmetic Shift Right by 8-bit immediate
8383instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
8384  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
8385  effect(KILL cr);
8386
8387  format %{ "SAR    $dst,$shift" %}
8388  opcode(0xC1, 0x7);  /* C1 /7 ib */
8389  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
8390  ins_pipe( ialu_mem_imm );
8391%}
8392
8393// Arithmetic Shift Right by variable
8394instruct sarI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8395  match(Set dst (RShiftI dst shift));
8396  effect(KILL cr);
8397
8398  size(2);
8399  format %{ "SAR    $dst,$shift" %}
8400  opcode(0xD3, 0x7);  /* D3 /7 */
8401  ins_encode( OpcP, RegOpc( dst ) );
8402  ins_pipe( ialu_reg_reg );
8403%}
8404
8405// Logical shift right by one
8406instruct shrI_eReg_1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8407  match(Set dst (URShiftI dst shift));
8408  effect(KILL cr);
8409
8410  size(2);
8411  format %{ "SHR    $dst,$shift" %}
8412  opcode(0xD1, 0x5);  /* D1 /5 */
8413  ins_encode( OpcP, RegOpc( dst ) );
8414  ins_pipe( ialu_reg );
8415%}
8416
8417// Logical Shift Right by 8-bit immediate
8418instruct shrI_eReg_imm(eRegI dst, immI8 shift, eFlagsReg cr) %{
8419  match(Set dst (URShiftI dst shift));
8420  effect(KILL cr);
8421
8422  size(3);
8423  format %{ "SHR    $dst,$shift" %}
8424  opcode(0xC1, 0x5);  /* C1 /5 ib */
8425  ins_encode( RegOpcImm( dst, shift) );
8426  ins_pipe( ialu_reg );
8427%}
8428
8429
8430// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
8431// This idiom is used by the compiler for the i2b bytecode.
8432instruct i2b(eRegI dst, xRegI src, immI_24 twentyfour, eFlagsReg cr) %{
8433  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
8434  effect(KILL cr);
8435
8436  size(3);
8437  format %{ "MOVSX  $dst,$src :8" %}
8438  opcode(0xBE, 0x0F);
8439  ins_encode( OpcS, OpcP, RegReg( dst, src));
8440  ins_pipe( ialu_reg_reg );
8441%}
8442
8443// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
8444// This idiom is used by the compiler the i2s bytecode.
8445instruct i2s(eRegI dst, xRegI src, immI_16 sixteen, eFlagsReg cr) %{
8446  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
8447  effect(KILL cr);
8448
8449  size(3);
8450  format %{ "MOVSX  $dst,$src :16" %}
8451  opcode(0xBF, 0x0F);
8452  ins_encode( OpcS, OpcP, RegReg( dst, src));
8453  ins_pipe( ialu_reg_reg );
8454%}
8455
8456
8457// Logical Shift Right by variable
8458instruct shrI_eReg_CL(eRegI dst, eCXRegI shift, eFlagsReg cr) %{
8459  match(Set dst (URShiftI dst shift));
8460  effect(KILL cr);
8461
8462  size(2);
8463  format %{ "SHR    $dst,$shift" %}
8464  opcode(0xD3, 0x5);  /* D3 /5 */
8465  ins_encode( OpcP, RegOpc( dst ) );
8466  ins_pipe( ialu_reg_reg );
8467%}
8468
8469
8470//----------Logical Instructions-----------------------------------------------
8471//----------Integer Logical Instructions---------------------------------------
8472// And Instructions
8473// And Register with Register
8474instruct andI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8475  match(Set dst (AndI dst src));
8476  effect(KILL cr);
8477
8478  size(2);
8479  format %{ "AND    $dst,$src" %}
8480  opcode(0x23);
8481  ins_encode( OpcP, RegReg( dst, src) );
8482  ins_pipe( ialu_reg_reg );
8483%}
8484
8485// And Register with Immediate
8486instruct andI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8487  match(Set dst (AndI dst src));
8488  effect(KILL cr);
8489
8490  format %{ "AND    $dst,$src" %}
8491  opcode(0x81,0x04);  /* Opcode 81 /4 */
8492  // ins_encode( RegImm( dst, src) );
8493  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8494  ins_pipe( ialu_reg );
8495%}
8496
8497// And Register with Memory
8498instruct andI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8499  match(Set dst (AndI dst (LoadI src)));
8500  effect(KILL cr);
8501
8502  ins_cost(125);
8503  format %{ "AND    $dst,$src" %}
8504  opcode(0x23);
8505  ins_encode( OpcP, RegMem( dst, src) );
8506  ins_pipe( ialu_reg_mem );
8507%}
8508
8509// And Memory with Register
8510instruct andI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8511  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8512  effect(KILL cr);
8513
8514  ins_cost(150);
8515  format %{ "AND    $dst,$src" %}
8516  opcode(0x21);  /* Opcode 21 /r */
8517  ins_encode( OpcP, RegMem( src, dst ) );
8518  ins_pipe( ialu_mem_reg );
8519%}
8520
8521// And Memory with Immediate
8522instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8523  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8524  effect(KILL cr);
8525
8526  ins_cost(125);
8527  format %{ "AND    $dst,$src" %}
8528  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8529  // ins_encode( MemImm( dst, src) );
8530  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8531  ins_pipe( ialu_mem_imm );
8532%}
8533
8534// Or Instructions
8535// Or Register with Register
8536instruct orI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8537  match(Set dst (OrI dst src));
8538  effect(KILL cr);
8539
8540  size(2);
8541  format %{ "OR     $dst,$src" %}
8542  opcode(0x0B);
8543  ins_encode( OpcP, RegReg( dst, src) );
8544  ins_pipe( ialu_reg_reg );
8545%}
8546
8547instruct orI_eReg_castP2X(eRegI dst, eRegP src, eFlagsReg cr) %{
8548  match(Set dst (OrI dst (CastP2X src)));
8549  effect(KILL cr);
8550
8551  size(2);
8552  format %{ "OR     $dst,$src" %}
8553  opcode(0x0B);
8554  ins_encode( OpcP, RegReg( dst, src) );
8555  ins_pipe( ialu_reg_reg );
8556%}
8557
8558
8559// Or Register with Immediate
8560instruct orI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8561  match(Set dst (OrI dst src));
8562  effect(KILL cr);
8563
8564  format %{ "OR     $dst,$src" %}
8565  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8566  // ins_encode( RegImm( dst, src) );
8567  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8568  ins_pipe( ialu_reg );
8569%}
8570
8571// Or Register with Memory
8572instruct orI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8573  match(Set dst (OrI dst (LoadI src)));
8574  effect(KILL cr);
8575
8576  ins_cost(125);
8577  format %{ "OR     $dst,$src" %}
8578  opcode(0x0B);
8579  ins_encode( OpcP, RegMem( dst, src) );
8580  ins_pipe( ialu_reg_mem );
8581%}
8582
8583// Or Memory with Register
8584instruct orI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8585  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8586  effect(KILL cr);
8587
8588  ins_cost(150);
8589  format %{ "OR     $dst,$src" %}
8590  opcode(0x09);  /* Opcode 09 /r */
8591  ins_encode( OpcP, RegMem( src, dst ) );
8592  ins_pipe( ialu_mem_reg );
8593%}
8594
8595// Or Memory with Immediate
8596instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8597  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8598  effect(KILL cr);
8599
8600  ins_cost(125);
8601  format %{ "OR     $dst,$src" %}
8602  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8603  // ins_encode( MemImm( dst, src) );
8604  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8605  ins_pipe( ialu_mem_imm );
8606%}
8607
8608// ROL/ROR
8609// ROL expand
8610instruct rolI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8611  effect(USE_DEF dst, USE shift, KILL cr);
8612
8613  format %{ "ROL    $dst, $shift" %}
8614  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8615  ins_encode( OpcP, RegOpc( dst ));
8616  ins_pipe( ialu_reg );
8617%}
8618
8619instruct rolI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8620  effect(USE_DEF dst, USE shift, KILL cr);
8621
8622  format %{ "ROL    $dst, $shift" %}
8623  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8624  ins_encode( RegOpcImm(dst, shift) );
8625  ins_pipe(ialu_reg);
8626%}
8627
8628instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8629  effect(USE_DEF dst, USE shift, KILL cr);
8630
8631  format %{ "ROL    $dst, $shift" %}
8632  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8633  ins_encode(OpcP, RegOpc(dst));
8634  ins_pipe( ialu_reg_reg );
8635%}
8636// end of ROL expand
8637
8638// ROL 32bit by one once
8639instruct rolI_eReg_i1(eRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8640  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8641
8642  expand %{
8643    rolI_eReg_imm1(dst, lshift, cr);
8644  %}
8645%}
8646
8647// ROL 32bit var by imm8 once
8648instruct rolI_eReg_i8(eRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8649  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8650  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8651
8652  expand %{
8653    rolI_eReg_imm8(dst, lshift, cr);
8654  %}
8655%}
8656
8657// ROL 32bit var by var once
8658instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8659  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8660
8661  expand %{
8662    rolI_eReg_CL(dst, shift, cr);
8663  %}
8664%}
8665
8666// ROL 32bit var by var once
8667instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8668  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8669
8670  expand %{
8671    rolI_eReg_CL(dst, shift, cr);
8672  %}
8673%}
8674
8675// ROR expand
8676instruct rorI_eReg_imm1(eRegI dst, immI1 shift, eFlagsReg cr) %{
8677  effect(USE_DEF dst, USE shift, KILL cr);
8678
8679  format %{ "ROR    $dst, $shift" %}
8680  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8681  ins_encode( OpcP, RegOpc( dst ) );
8682  ins_pipe( ialu_reg );
8683%}
8684
8685instruct rorI_eReg_imm8(eRegI dst, immI8 shift, eFlagsReg cr) %{
8686  effect (USE_DEF dst, USE shift, KILL cr);
8687
8688  format %{ "ROR    $dst, $shift" %}
8689  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8690  ins_encode( RegOpcImm(dst, shift) );
8691  ins_pipe( ialu_reg );
8692%}
8693
8694instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8695  effect(USE_DEF dst, USE shift, KILL cr);
8696
8697  format %{ "ROR    $dst, $shift" %}
8698  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8699  ins_encode(OpcP, RegOpc(dst));
8700  ins_pipe( ialu_reg_reg );
8701%}
8702// end of ROR expand
8703
8704// ROR right once
8705instruct rorI_eReg_i1(eRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8706  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8707
8708  expand %{
8709    rorI_eReg_imm1(dst, rshift, cr);
8710  %}
8711%}
8712
8713// ROR 32bit by immI8 once
8714instruct rorI_eReg_i8(eRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8715  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8716  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8717
8718  expand %{
8719    rorI_eReg_imm8(dst, rshift, cr);
8720  %}
8721%}
8722
8723// ROR 32bit var by var once
8724instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8725  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8726
8727  expand %{
8728    rorI_eReg_CL(dst, shift, cr);
8729  %}
8730%}
8731
8732// ROR 32bit var by var once
8733instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8734  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8735
8736  expand %{
8737    rorI_eReg_CL(dst, shift, cr);
8738  %}
8739%}
8740
8741// Xor Instructions
8742// Xor Register with Register
8743instruct xorI_eReg(eRegI dst, eRegI src, eFlagsReg cr) %{
8744  match(Set dst (XorI dst src));
8745  effect(KILL cr);
8746
8747  size(2);
8748  format %{ "XOR    $dst,$src" %}
8749  opcode(0x33);
8750  ins_encode( OpcP, RegReg( dst, src) );
8751  ins_pipe( ialu_reg_reg );
8752%}
8753
8754// Xor Register with Immediate -1
8755instruct xorI_eReg_im1(eRegI dst, immI_M1 imm) %{
8756  match(Set dst (XorI dst imm));  
8757
8758  size(2);
8759  format %{ "NOT    $dst" %}  
8760  ins_encode %{
8761     __ notl($dst$$Register);
8762  %}
8763  ins_pipe( ialu_reg );
8764%}
8765
8766// Xor Register with Immediate
8767instruct xorI_eReg_imm(eRegI dst, immI src, eFlagsReg cr) %{
8768  match(Set dst (XorI dst src));
8769  effect(KILL cr);
8770
8771  format %{ "XOR    $dst,$src" %}
8772  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8773  // ins_encode( RegImm( dst, src) );
8774  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8775  ins_pipe( ialu_reg );
8776%}
8777
8778// Xor Register with Memory
8779instruct xorI_eReg_mem(eRegI dst, memory src, eFlagsReg cr) %{
8780  match(Set dst (XorI dst (LoadI src)));
8781  effect(KILL cr);
8782
8783  ins_cost(125);
8784  format %{ "XOR    $dst,$src" %}
8785  opcode(0x33);
8786  ins_encode( OpcP, RegMem(dst, src) );
8787  ins_pipe( ialu_reg_mem );
8788%}
8789
8790// Xor Memory with Register
8791instruct xorI_mem_eReg(memory dst, eRegI src, eFlagsReg cr) %{
8792  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8793  effect(KILL cr);
8794
8795  ins_cost(150);
8796  format %{ "XOR    $dst,$src" %}
8797  opcode(0x31);  /* Opcode 31 /r */
8798  ins_encode( OpcP, RegMem( src, dst ) );
8799  ins_pipe( ialu_mem_reg );
8800%}
8801
8802// Xor Memory with Immediate
8803instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8804  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8805  effect(KILL cr);
8806
8807  ins_cost(125);
8808  format %{ "XOR    $dst,$src" %}
8809  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8810  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8811  ins_pipe( ialu_mem_imm );
8812%}
8813
8814//----------Convert Int to Boolean---------------------------------------------
8815
8816instruct movI_nocopy(eRegI dst, eRegI src) %{
8817  effect( DEF dst, USE src );
8818  format %{ "MOV    $dst,$src" %}
8819  ins_encode( enc_Copy( dst, src) );
8820  ins_pipe( ialu_reg_reg );
8821%}
8822
8823instruct ci2b( eRegI dst, eRegI src, eFlagsReg cr ) %{
8824  effect( USE_DEF dst, USE src, KILL cr );
8825
8826  size(4);
8827  format %{ "NEG    $dst\n\t"
8828            "ADC    $dst,$src" %}
8829  ins_encode( neg_reg(dst),
8830              OpcRegReg(0x13,dst,src) );
8831  ins_pipe( ialu_reg_reg_long );
8832%}
8833
8834instruct convI2B( eRegI dst, eRegI src, eFlagsReg cr ) %{
8835  match(Set dst (Conv2B src));
8836
8837  expand %{
8838    movI_nocopy(dst,src);
8839    ci2b(dst,src,cr);
8840  %}
8841%}
8842
8843instruct movP_nocopy(eRegI dst, eRegP src) %{
8844  effect( DEF dst, USE src );
8845  format %{ "MOV    $dst,$src" %}
8846  ins_encode( enc_Copy( dst, src) );
8847  ins_pipe( ialu_reg_reg );
8848%}
8849
8850instruct cp2b( eRegI dst, eRegP src, eFlagsReg cr ) %{
8851  effect( USE_DEF dst, USE src, KILL cr );
8852  format %{ "NEG    $dst\n\t"
8853            "ADC    $dst,$src" %}
8854  ins_encode( neg_reg(dst),
8855              OpcRegReg(0x13,dst,src) );
8856  ins_pipe( ialu_reg_reg_long );
8857%}
8858
8859instruct convP2B( eRegI dst, eRegP src, eFlagsReg cr ) %{
8860  match(Set dst (Conv2B src));
8861
8862  expand %{
8863    movP_nocopy(dst,src);
8864    cp2b(dst,src,cr);
8865  %}
8866%}
8867
8868instruct cmpLTMask( eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr ) %{
8869  match(Set dst (CmpLTMask p q));
8870  effect( KILL cr );
8871  ins_cost(400);
8872
8873  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8874  format %{ "XOR    $dst,$dst\n\t"
8875            "CMP    $p,$q\n\t"
8876            "SETlt  $dst\n\t"
8877            "NEG    $dst" %}
8878  ins_encode( OpcRegReg(0x33,dst,dst),
8879              OpcRegReg(0x3B,p,q),
8880              setLT_reg(dst), neg_reg(dst) );
8881  ins_pipe( pipe_slow );
8882%}
8883
8884instruct cmpLTMask0( eRegI dst, immI0 zero, eFlagsReg cr ) %{
8885  match(Set dst (CmpLTMask dst zero));
8886  effect( DEF dst, KILL cr );
8887  ins_cost(100);
8888
8889  format %{ "SAR    $dst,31" %}
8890  opcode(0xC1, 0x7);  /* C1 /7 ib */
8891  ins_encode( RegOpcImm( dst, 0x1F ) );
8892  ins_pipe( ialu_reg );
8893%}
8894
8895
8896instruct cadd_cmpLTMask( ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp, eFlagsReg cr ) %{
8897  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8898  effect( KILL tmp, KILL cr );
8899  ins_cost(400);
8900  // annoyingly, $tmp has no edges so you cant ask for it in
8901  // any format or encoding
8902  format %{ "SUB    $p,$q\n\t"
8903            "SBB    ECX,ECX\n\t"
8904            "AND    ECX,$y\n\t"
8905            "ADD    $p,ECX" %}
8906  ins_encode( enc_cmpLTP(p,q,y,tmp) );
8907  ins_pipe( pipe_cmplt );
8908%}
8909
8910/* If I enable this, I encourage spilling in the inner loop of compress.
8911instruct cadd_cmpLTMask_mem( ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr ) %{
8912  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8913  effect( USE_KILL tmp, KILL cr );
8914  ins_cost(400);
8915
8916  format %{ "SUB    $p,$q\n\t"
8917            "SBB    ECX,ECX\n\t"
8918            "AND    ECX,$y\n\t"
8919            "ADD    $p,ECX" %}
8920  ins_encode( enc_cmpLTP_mem(p,q,y,tmp) );
8921%}
8922*/
8923
8924//----------Long Instructions------------------------------------------------
8925// Add Long Register with Register
8926instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8927  match(Set dst (AddL dst src));
8928  effect(KILL cr);
8929  ins_cost(200);
8930  format %{ "ADD    $dst.lo,$src.lo\n\t"
8931            "ADC    $dst.hi,$src.hi" %}
8932  opcode(0x03, 0x13);
8933  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8934  ins_pipe( ialu_reg_reg_long );
8935%}
8936
8937// Add Long Register with Immediate
8938instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8939  match(Set dst (AddL dst src));
8940  effect(KILL cr);
8941  format %{ "ADD    $dst.lo,$src.lo\n\t"
8942            "ADC    $dst.hi,$src.hi" %}
8943  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8944  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8945  ins_pipe( ialu_reg_long );
8946%}
8947
8948// Add Long Register with Memory
8949instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8950  match(Set dst (AddL dst (LoadL mem)));
8951  effect(KILL cr);
8952  ins_cost(125);
8953  format %{ "ADD    $dst.lo,$mem\n\t"
8954            "ADC    $dst.hi,$mem+4" %}
8955  opcode(0x03, 0x13);
8956  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8957  ins_pipe( ialu_reg_long_mem );
8958%}
8959
8960// Subtract Long Register with Register.
8961instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8962  match(Set dst (SubL dst src));
8963  effect(KILL cr);
8964  ins_cost(200);
8965  format %{ "SUB    $dst.lo,$src.lo\n\t"
8966            "SBB    $dst.hi,$src.hi" %}
8967  opcode(0x2B, 0x1B);
8968  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8969  ins_pipe( ialu_reg_reg_long );
8970%}
8971
8972// Subtract Long Register with Immediate
8973instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8974  match(Set dst (SubL dst src));
8975  effect(KILL cr);
8976  format %{ "SUB    $dst.lo,$src.lo\n\t"
8977            "SBB    $dst.hi,$src.hi" %}
8978  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8979  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8980  ins_pipe( ialu_reg_long );
8981%}
8982
8983// Subtract Long Register with Memory
8984instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8985  match(Set dst (SubL dst (LoadL mem)));
8986  effect(KILL cr);
8987  ins_cost(125);
8988  format %{ "SUB    $dst.lo,$mem\n\t"
8989            "SBB    $dst.hi,$mem+4" %}
8990  opcode(0x2B, 0x1B);
8991  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8992  ins_pipe( ialu_reg_long_mem );
8993%}
8994
8995instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8996  match(Set dst (SubL zero dst));
8997  effect(KILL cr);
8998  ins_cost(300);
8999  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
9000  ins_encode( neg_long(dst) );
9001  ins_pipe( ialu_reg_reg_long );
9002%}
9003
9004// And Long Register with Register
9005instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9006  match(Set dst (AndL dst src));
9007  effect(KILL cr);
9008  format %{ "AND    $dst.lo,$src.lo\n\t"
9009            "AND    $dst.hi,$src.hi" %}
9010  opcode(0x23,0x23);
9011  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9012  ins_pipe( ialu_reg_reg_long );
9013%}
9014
9015// And Long Register with Immediate
9016instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9017  match(Set dst (AndL dst src));
9018  effect(KILL cr);
9019  format %{ "AND    $dst.lo,$src.lo\n\t"
9020            "AND    $dst.hi,$src.hi" %}
9021  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
9022  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9023  ins_pipe( ialu_reg_long );
9024%}
9025
9026// And Long Register with Memory
9027instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9028  match(Set dst (AndL dst (LoadL mem)));
9029  effect(KILL cr);
9030  ins_cost(125);
9031  format %{ "AND    $dst.lo,$mem\n\t"
9032            "AND    $dst.hi,$mem+4" %}
9033  opcode(0x23, 0x23);
9034  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9035  ins_pipe( ialu_reg_long_mem );
9036%}
9037
9038// Or Long Register with Register
9039instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9040  match(Set dst (OrL dst src));
9041  effect(KILL cr);
9042  format %{ "OR     $dst.lo,$src.lo\n\t"
9043            "OR     $dst.hi,$src.hi" %}
9044  opcode(0x0B,0x0B);
9045  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9046  ins_pipe( ialu_reg_reg_long );
9047%}
9048
9049// Or Long Register with Immediate
9050instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9051  match(Set dst (OrL dst src));
9052  effect(KILL cr);
9053  format %{ "OR     $dst.lo,$src.lo\n\t"
9054            "OR     $dst.hi,$src.hi" %}
9055  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
9056  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9057  ins_pipe( ialu_reg_long );
9058%}
9059
9060// Or Long Register with Memory
9061instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9062  match(Set dst (OrL dst (LoadL mem)));
9063  effect(KILL cr);
9064  ins_cost(125);
9065  format %{ "OR     $dst.lo,$mem\n\t"
9066            "OR     $dst.hi,$mem+4" %}
9067  opcode(0x0B,0x0B);
9068  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9069  ins_pipe( ialu_reg_long_mem );
9070%}
9071
9072// Xor Long Register with Register
9073instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9074  match(Set dst (XorL dst src));
9075  effect(KILL cr);
9076  format %{ "XOR    $dst.lo,$src.lo\n\t"
9077            "XOR    $dst.hi,$src.hi" %}
9078  opcode(0x33,0x33);
9079  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9080  ins_pipe( ialu_reg_reg_long );
9081%}
9082
9083// Xor Long Register with Immediate -1
9084instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9085  match(Set dst (XorL dst imm));  
9086  format %{ "NOT    $dst.lo\n\t"
9087            "NOT    $dst.hi" %}
9088  ins_encode %{
9089     __ notl($dst$$Register);
9090     __ notl(HIGH_FROM_LOW($dst$$Register));
9091  %}
9092  ins_pipe( ialu_reg_long );
9093%}
9094
9095// Xor Long Register with Immediate
9096instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9097  match(Set dst (XorL dst src));
9098  effect(KILL cr);
9099  format %{ "XOR    $dst.lo,$src.lo\n\t"
9100            "XOR    $dst.hi,$src.hi" %}
9101  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9102  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9103  ins_pipe( ialu_reg_long );
9104%}
9105
9106// Xor Long Register with Memory
9107instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9108  match(Set dst (XorL dst (LoadL mem)));
9109  effect(KILL cr);
9110  ins_cost(125);
9111  format %{ "XOR    $dst.lo,$mem\n\t"
9112            "XOR    $dst.hi,$mem+4" %}
9113  opcode(0x33,0x33);
9114  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9115  ins_pipe( ialu_reg_long_mem );
9116%}
9117
9118// Shift Left Long by 1
9119instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9120  predicate(UseNewLongLShift);
9121  match(Set dst (LShiftL dst cnt));
9122  effect(KILL cr);
9123  ins_cost(100);
9124  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9125            "ADC    $dst.hi,$dst.hi" %}
9126  ins_encode %{
9127    __ addl($dst$$Register,$dst$$Register);
9128    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9129  %}
9130  ins_pipe( ialu_reg_long );
9131%}
9132
9133// Shift Left Long by 2
9134instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9135  predicate(UseNewLongLShift);
9136  match(Set dst (LShiftL dst cnt));
9137  effect(KILL cr);
9138  ins_cost(100);
9139  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9140            "ADC    $dst.hi,$dst.hi\n\t" 
9141            "ADD    $dst.lo,$dst.lo\n\t"
9142            "ADC    $dst.hi,$dst.hi" %}
9143  ins_encode %{
9144    __ addl($dst$$Register,$dst$$Register);
9145    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9146    __ addl($dst$$Register,$dst$$Register);
9147    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9148  %}
9149  ins_pipe( ialu_reg_long );
9150%}
9151
9152// Shift Left Long by 3
9153instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9154  predicate(UseNewLongLShift);
9155  match(Set dst (LShiftL dst cnt));
9156  effect(KILL cr);
9157  ins_cost(100);
9158  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9159            "ADC    $dst.hi,$dst.hi\n\t" 
9160            "ADD    $dst.lo,$dst.lo\n\t"
9161            "ADC    $dst.hi,$dst.hi\n\t" 
9162            "ADD    $dst.lo,$dst.lo\n\t"
9163            "ADC    $dst.hi,$dst.hi" %}
9164  ins_encode %{
9165    __ addl($dst$$Register,$dst$$Register);
9166    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9167    __ addl($dst$$Register,$dst$$Register);
9168    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9169    __ addl($dst$$Register,$dst$$Register);
9170    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9171  %}
9172  ins_pipe( ialu_reg_long );
9173%}
9174
9175// Shift Left Long by 1-31
9176instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9177  match(Set dst (LShiftL dst cnt));
9178  effect(KILL cr);
9179  ins_cost(200);
9180  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9181            "SHL    $dst.lo,$cnt" %}
9182  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9183  ins_encode( move_long_small_shift(dst,cnt) );
9184  ins_pipe( ialu_reg_long );
9185%}
9186
9187// Shift Left Long by 32-63
9188instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9189  match(Set dst (LShiftL dst cnt));
9190  effect(KILL cr);
9191  ins_cost(300);
9192  format %{ "MOV    $dst.hi,$dst.lo\n"
9193          "\tSHL    $dst.hi,$cnt-32\n"
9194          "\tXOR    $dst.lo,$dst.lo" %}
9195  opcode(0xC1, 0x4);  /* C1 /4 ib */
9196  ins_encode( move_long_big_shift_clr(dst,cnt) );
9197  ins_pipe( ialu_reg_long );
9198%}
9199
9200// Shift Left Long by variable
9201instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9202  match(Set dst (LShiftL dst shift));
9203  effect(KILL cr);
9204  ins_cost(500+200);
9205  size(17);
9206  format %{ "TEST   $shift,32\n\t"
9207            "JEQ,s  small\n\t"
9208            "MOV    $dst.hi,$dst.lo\n\t"
9209            "XOR    $dst.lo,$dst.lo\n"
9210    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9211            "SHL    $dst.lo,$shift" %}
9212  ins_encode( shift_left_long( dst, shift ) );
9213  ins_pipe( pipe_slow );
9214%}
9215
9216// Shift Right Long by 1-31
9217instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9218  match(Set dst (URShiftL dst cnt));
9219  effect(KILL cr);
9220  ins_cost(200);
9221  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9222            "SHR    $dst.hi,$cnt" %}
9223  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9224  ins_encode( move_long_small_shift(dst,cnt) );
9225  ins_pipe( ialu_reg_long );
9226%}
9227
9228// Shift Right Long by 32-63
9229instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9230  match(Set dst (URShiftL dst cnt));
9231  effect(KILL cr);
9232  ins_cost(300);
9233  format %{ "MOV    $dst.lo,$dst.hi\n"
9234          "\tSHR    $dst.lo,$cnt-32\n"
9235          "\tXOR    $dst.hi,$dst.hi" %}
9236  opcode(0xC1, 0x5);  /* C1 /5 ib */
9237  ins_encode( move_long_big_shift_clr(dst,cnt) );
9238  ins_pipe( ialu_reg_long );
9239%}
9240
9241// Shift Right Long by variable
9242instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9243  match(Set dst (URShiftL dst shift));
9244  effect(KILL cr);
9245  ins_cost(600);
9246  size(17);
9247  format %{ "TEST   $shift,32\n\t"
9248            "JEQ,s  small\n\t"
9249            "MOV    $dst.lo,$dst.hi\n\t"
9250            "XOR    $dst.hi,$dst.hi\n"
9251    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9252            "SHR    $dst.hi,$shift" %}
9253  ins_encode( shift_right_long( dst, shift ) );
9254  ins_pipe( pipe_slow );
9255%}
9256
9257// Shift Right Long by 1-31
9258instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9259  match(Set dst (RShiftL dst cnt));
9260  effect(KILL cr);
9261  ins_cost(200);
9262  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9263            "SAR    $dst.hi,$cnt" %}
9264  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9265  ins_encode( move_long_small_shift(dst,cnt) );
9266  ins_pipe( ialu_reg_long );
9267%}
9268
9269// Shift Right Long by 32-63
9270instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9271  match(Set dst (RShiftL dst cnt));
9272  effect(KILL cr);
9273  ins_cost(300);
9274  format %{ "MOV    $dst.lo,$dst.hi\n"
9275          "\tSAR    $dst.lo,$cnt-32\n"
9276          "\tSAR    $dst.hi,31" %}
9277  opcode(0xC1, 0x7);  /* C1 /7 ib */
9278  ins_encode( move_long_big_shift_sign(dst,cnt) );
9279  ins_pipe( ialu_reg_long );
9280%}
9281
9282// Shift Right arithmetic Long by variable
9283instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9284  match(Set dst (RShiftL dst shift));
9285  effect(KILL cr);
9286  ins_cost(600);
9287  size(18);
9288  format %{ "TEST   $shift,32\n\t"
9289            "JEQ,s  small\n\t"
9290            "MOV    $dst.lo,$dst.hi\n\t"
9291            "SAR    $dst.hi,31\n"
9292    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9293            "SAR    $dst.hi,$shift" %}
9294  ins_encode( shift_right_arith_long( dst, shift ) );
9295  ins_pipe( pipe_slow );
9296%}
9297
9298
9299//----------Double Instructions------------------------------------------------
9300// Double Math
9301
9302// Compare & branch
9303
9304// P6 version of float compare, sets condition codes in EFLAGS
9305instruct cmpD_cc_P6(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9306  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9307  match(Set cr (CmpD src1 src2));
9308  effect(KILL rax);
9309  ins_cost(150);
9310  format %{ "FLD    $src1\n\t"
9311            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9312            "JNP    exit\n\t"
9313            "MOV    ah,1       // saw a NaN, set CF\n\t"
9314            "SAHF\n"
9315     "exit:\tNOP               // avoid branch to branch" %}
9316  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9317  ins_encode( Push_Reg_D(src1),
9318              OpcP, RegOpc(src2),
9319              cmpF_P6_fixup );
9320  ins_pipe( pipe_slow );
9321%}
9322
9323instruct cmpD_cc_P6CF(eFlagsRegUCF cr, regD src1, regD src2) %{
9324  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9325  match(Set cr (CmpD src1 src2));
9326  ins_cost(150);
9327  format %{ "FLD    $src1\n\t"
9328            "FUCOMIP ST,$src2  // P6 instruction" %}
9329  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9330  ins_encode( Push_Reg_D(src1),
9331              OpcP, RegOpc(src2));
9332  ins_pipe( pipe_slow );
9333%}
9334
9335// Compare & branch
9336instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2, eAXRegI rax) %{
9337  predicate(UseSSE<=1);
9338  match(Set cr (CmpD src1 src2));
9339  effect(KILL rax);
9340  ins_cost(200);
9341  format %{ "FLD    $src1\n\t"
9342            "FCOMp  $src2\n\t"
9343            "FNSTSW AX\n\t"
9344            "TEST   AX,0x400\n\t"
9345            "JZ,s   flags\n\t"
9346            "MOV    AH,1\t# unordered treat as LT\n"
9347    "flags:\tSAHF" %}
9348  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9349  ins_encode( Push_Reg_D(src1),
9350              OpcP, RegOpc(src2),
9351              fpu_flags);
9352  ins_pipe( pipe_slow );
9353%}
9354
9355// Compare vs zero into -1,0,1
9356instruct cmpD_0(eRegI dst, regD src1, immD0 zero, eAXRegI rax, eFlagsReg cr) %{
9357  predicate(UseSSE<=1);
9358  match(Set dst (CmpD3 src1 zero));
9359  effect(KILL cr, KILL rax);
9360  ins_cost(280);
9361  format %{ "FTSTD  $dst,$src1" %}
9362  opcode(0xE4, 0xD9);
9363  ins_encode( Push_Reg_D(src1),
9364              OpcS, OpcP, PopFPU,
9365              CmpF_Result(dst));
9366  ins_pipe( pipe_slow );
9367%}
9368
9369// Compare into -1,0,1
9370instruct cmpD_reg(eRegI dst, regD src1, regD src2, eAXRegI rax, eFlagsReg cr) %{
9371  predicate(UseSSE<=1);
9372  match(Set dst (CmpD3 src1 src2));
9373  effect(KILL cr, KILL rax);
9374  ins_cost(300);
9375  format %{ "FCMPD  $dst,$src1,$src2" %}
9376  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9377  ins_encode( Push_Reg_D(src1),
9378              OpcP, RegOpc(src2),
9379              CmpF_Result(dst));
9380  ins_pipe( pipe_slow );
9381%}
9382
9383// float compare and set condition codes in EFLAGS by XMM regs
9384instruct cmpXD_cc(eFlagsRegU cr, regXD dst, regXD src, eAXRegI rax) %{
9385  predicate(UseSSE>=2);
9386  match(Set cr (CmpD dst src));
9387  effect(KILL rax);
9388  ins_cost(125);
9389  format %{ "COMISD $dst,$src\n"
9390          "\tJNP    exit\n"
9391          "\tMOV    ah,1       // saw a NaN, set CF\n"
9392          "\tSAHF\n"
9393     "exit:\tNOP               // avoid branch to branch" %}
9394  opcode(0x66, 0x0F, 0x2F);
9395  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src), cmpF_P6_fixup);
9396  ins_pipe( pipe_slow );
9397%}
9398
9399instruct cmpXD_ccCF(eFlagsRegUCF cr, regXD dst, regXD src) %{
9400  predicate(UseSSE>=2);
9401  match(Set cr (CmpD dst src));
9402  ins_cost(100);
9403  format %{ "COMISD $dst,$src" %}
9404  opcode(0x66, 0x0F, 0x2F);
9405  ins_encode(OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
9406  ins_pipe( pipe_slow );
9407%}
9408
9409// float compare and set condition codes in EFLAGS by XMM regs
9410instruct cmpXD_ccmem(eFlagsRegU cr, regXD dst, memory src, eAXRegI rax) %{
9411  predicate(UseSSE>=2);
9412  match(Set cr (CmpD dst (LoadD src)));
9413  effect(KILL rax);
9414  ins_cost(145);
9415  format %{ "COMISD $dst,$src\n"
9416          "\tJNP    exit\n"
9417          "\tMOV    ah,1       // saw a NaN, set CF\n"
9418          "\tSAHF\n"
9419     "exit:\tNOP               // avoid branch to branch" %}
9420  opcode(0x66, 0x0F, 0x2F);
9421  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src), cmpF_P6_fixup);
9422  ins_pipe( pipe_slow );
9423%}
9424
9425instruct cmpXD_ccmemCF(eFlagsRegUCF cr, regXD dst, memory src) %{
9426  predicate(UseSSE>=2);
9427  match(Set cr (CmpD dst (LoadD src)));
9428  ins_cost(100);
9429  format %{ "COMISD $dst,$src" %}
9430  opcode(0x66, 0x0F, 0x2F);
9431  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(dst, src));
9432  ins_pipe( pipe_slow );
9433%}
9434
9435// Compare into -1,0,1 in XMM
9436instruct cmpXD_reg(eRegI dst, regXD src1, regXD src2, eFlagsReg cr) %{
9437  predicate(UseSSE>=2);
9438  match(Set dst (CmpD3 src1 src2));
9439  effect(KILL cr);
9440  ins_cost(255);
9441  format %{ "XOR    $dst,$dst\n"
9442          "\tCOMISD $src1,$src2\n"
9443          "\tJP,s   nan\n"
9444          "\tJEQ,s  exit\n"
9445          "\tJA,s   inc\n"
9446      "nan:\tDEC    $dst\n"
9447          "\tJMP,s  exit\n"
9448      "inc:\tINC    $dst\n"
9449      "exit:"
9450                %}
9451  opcode(0x66, 0x0F, 0x2F);
9452  ins_encode(Xor_Reg(dst), OpcP, OpcS, Opcode(tertiary), RegReg(src1, src2),
9453             CmpX_Result(dst));
9454  ins_pipe( pipe_slow );
9455%}
9456
9457// Compare into -1,0,1 in XMM and memory
9458instruct cmpXD_regmem(eRegI dst, regXD src1, memory mem, eFlagsReg cr) %{
9459  predicate(UseSSE>=2);
9460  match(Set dst (CmpD3 src1 (LoadD mem)));
9461  effect(KILL cr);
9462  ins_cost(275);
9463  format %{ "COMISD $src1,$mem\n"
9464          "\tMOV    $dst,0\t\t# do not blow flags\n"
9465          "\tJP,s   nan\n"
9466          "\tJEQ,s  exit\n"
9467          "\tJA,s   inc\n"
9468      "nan:\tDEC    $dst\n"
9469          "\tJMP,s  exit\n"
9470      "inc:\tINC    $dst\n"
9471      "exit:"
9472                %}
9473  opcode(0x66, 0x0F, 0x2F);
9474  ins_encode(OpcP, OpcS, Opcode(tertiary), RegMem(src1, mem),
9475             LdImmI(dst,0x0), CmpX_Result(dst));
9476  ins_pipe( pipe_slow );
9477%}
9478
9479
9480instruct subD_reg(regD dst, regD src) %{
9481  predicate (UseSSE <=1);
9482  match(Set dst (SubD dst src));
9483
9484  format %{ "FLD    $src\n\t"
9485            "DSUBp  $dst,ST" %}
9486  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9487  ins_cost(150);
9488  ins_encode( Push_Reg_D(src),
9489              OpcP, RegOpc(dst) );
9490  ins_pipe( fpu_reg_reg );
9491%}
9492
9493instruct subD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9494  predicate (UseSSE <=1);
9495  match(Set dst (RoundDouble (SubD src1 src2)));
9496  ins_cost(250);
9497
9498  format %{ "FLD    $src2\n\t"
9499            "DSUB   ST,$src1\n\t"
9500            "FSTP_D $dst\t# D-round" %}
9501  opcode(0xD8, 0x5);
9502  ins_encode( Push_Reg_D(src2),
9503              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9504  ins_pipe( fpu_mem_reg_reg );
9505%}
9506
9507
9508instruct subD_reg_mem(regD dst, memory src) %{
9509  predicate (UseSSE <=1);
9510  match(Set dst (SubD dst (LoadD src)));
9511  ins_cost(150);
9512
9513  format %{ "FLD    $src\n\t"
9514            "DSUBp  $dst,ST" %}
9515  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9516  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9517              OpcP, RegOpc(dst) );
9518  ins_pipe( fpu_reg_mem );
9519%}
9520
9521instruct absD_reg(regDPR1 dst, regDPR1 src) %{
9522  predicate (UseSSE<=1);
9523  match(Set dst (AbsD src));
9524  ins_cost(100);
9525  format %{ "FABS" %}
9526  opcode(0xE1, 0xD9);
9527  ins_encode( OpcS, OpcP );
9528  ins_pipe( fpu_reg_reg );
9529%}
9530
9531instruct absXD_reg( regXD dst ) %{
9532  predicate(UseSSE>=2);
9533  match(Set dst (AbsD dst));
9534  format %{ "ANDPD  $dst,[0x7FFFFFFFFFFFFFFF]\t# ABS D by sign masking" %}
9535  ins_encode( AbsXD_encoding(dst));
9536  ins_pipe( pipe_slow );
9537%}
9538
9539instruct negD_reg(regDPR1 dst, regDPR1 src) %{
9540  predicate(UseSSE<=1);
9541  match(Set dst (NegD src));
9542  ins_cost(100);
9543  format %{ "FCHS" %}
9544  opcode(0xE0, 0xD9);
9545  ins_encode( OpcS, OpcP );
9546  ins_pipe( fpu_reg_reg );
9547%}
9548
9549instruct negXD_reg( regXD dst ) %{
9550  predicate(UseSSE>=2);
9551  match(Set dst (NegD dst));
9552  format %{ "XORPD  $dst,[0x8000000000000000]\t# CHS D by sign flipping" %}
9553  ins_encode %{
9554     __ xorpd($dst$$XMMRegister,
9555              ExternalAddress((address)double_signflip_pool));
9556  %}
9557  ins_pipe( pipe_slow );
9558%}
9559
9560instruct addD_reg(regD dst, regD src) %{
9561  predicate(UseSSE<=1);
9562  match(Set dst (AddD dst src));
9563  format %{ "FLD    $src\n\t"
9564            "DADD   $dst,ST" %}
9565  size(4);
9566  ins_cost(150);
9567  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9568  ins_encode( Push_Reg_D(src),
9569              OpcP, RegOpc(dst) );
9570  ins_pipe( fpu_reg_reg );
9571%}
9572
9573
9574instruct addD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9575  predicate(UseSSE<=1);
9576  match(Set dst (RoundDouble (AddD src1 src2)));
9577  ins_cost(250);
9578
9579  format %{ "FLD    $src2\n\t"
9580            "DADD   ST,$src1\n\t"
9581            "FSTP_D $dst\t# D-round" %}
9582  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9583  ins_encode( Push_Reg_D(src2),
9584              OpcP, RegOpc(src1), Pop_Mem_D(dst) );
9585  ins_pipe( fpu_mem_reg_reg );
9586%}
9587
9588
9589instruct addD_reg_mem(regD dst, memory src) %{
9590  predicate(UseSSE<=1);
9591  match(Set dst (AddD dst (LoadD src)));
9592  ins_cost(150);
9593
9594  format %{ "FLD    $src\n\t"
9595            "DADDp  $dst,ST" %}
9596  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9597  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9598              OpcP, RegOpc(dst) );
9599  ins_pipe( fpu_reg_mem );
9600%}
9601
9602// add-to-memory
9603instruct addD_mem_reg(memory dst, regD src) %{
9604  predicate(UseSSE<=1);
9605  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9606  ins_cost(150);
9607
9608  format %{ "FLD_D  $dst\n\t"
9609            "DADD   ST,$src\n\t"
9610            "FST_D  $dst" %}
9611  opcode(0xDD, 0x0);
9612  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9613              Opcode(0xD8), RegOpc(src),
9614              set_instruction_start,
9615              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9616  ins_pipe( fpu_reg_mem );
9617%}
9618
9619instruct addD_reg_imm1(regD dst, immD1 src) %{
9620  predicate(UseSSE<=1);
9621  match(Set dst (AddD dst src));
9622  ins_cost(125);
9623  format %{ "FLD1\n\t"
9624            "DADDp  $dst,ST" %}
9625  opcode(0xDE, 0x00);
9626  ins_encode( LdImmD(src),
9627              OpcP, RegOpc(dst) );
9628  ins_pipe( fpu_reg );
9629%}
9630
9631instruct addD_reg_imm(regD dst, immD src) %{
9632  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9633  match(Set dst (AddD dst src));
9634  ins_cost(200);
9635  format %{ "FLD_D  [$src]\n\t"
9636            "DADDp  $dst,ST" %}
9637  opcode(0xDE, 0x00);       /* DE /0 */
9638  ins_encode( LdImmD(src),
9639              OpcP, RegOpc(dst));
9640  ins_pipe( fpu_reg_mem );
9641%}
9642
9643instruct addD_reg_imm_round(stackSlotD dst, regD src, immD con) %{
9644  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9645  match(Set dst (RoundDouble (AddD src con)));
9646  ins_cost(200);
9647  format %{ "FLD_D  [$con]\n\t"
9648            "DADD   ST,$src\n\t"
9649            "FSTP_D $dst\t# D-round" %}
9650  opcode(0xD8, 0x00);       /* D8 /0 */
9651  ins_encode( LdImmD(con),
9652              OpcP, RegOpc(src), Pop_Mem_D(dst));
9653  ins_pipe( fpu_mem_reg_con );
9654%}
9655
9656// Add two double precision floating point values in xmm
9657instruct addXD_reg(regXD dst, regXD src) %{
9658  predicate(UseSSE>=2);
9659  match(Set dst (AddD dst src));
9660  format %{ "ADDSD  $dst,$src" %}
9661  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
9662  ins_pipe( pipe_slow );
9663%}
9664
9665instruct addXD_imm(regXD dst, immXD con) %{
9666  predicate(UseSSE>=2);
9667  match(Set dst (AddD dst con));
9668  format %{ "ADDSD  $dst,[$con]" %}
9669  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), LdImmXD(dst, con) );
9670  ins_pipe( pipe_slow );
9671%}
9672
9673instruct addXD_mem(regXD dst, memory mem) %{
9674  predicate(UseSSE>=2);
9675  match(Set dst (AddD dst (LoadD mem)));
9676  format %{ "ADDSD  $dst,$mem" %}
9677  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x58), RegMem(dst,mem));
9678  ins_pipe( pipe_slow );
9679%}
9680
9681// Sub two double precision floating point values in xmm
9682instruct subXD_reg(regXD dst, regXD src) %{
9683  predicate(UseSSE>=2);
9684  match(Set dst (SubD dst src));
9685  format %{ "SUBSD  $dst,$src" %}
9686  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
9687  ins_pipe( pipe_slow );
9688%}
9689
9690instruct subXD_imm(regXD dst, immXD con) %{
9691  predicate(UseSSE>=2);
9692  match(Set dst (SubD dst con));
9693  format %{ "SUBSD  $dst,[$con]" %}
9694  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), LdImmXD(dst, con) );
9695  ins_pipe( pipe_slow );
9696%}
9697
9698instruct subXD_mem(regXD dst, memory mem) %{
9699  predicate(UseSSE>=2);
9700  match(Set dst (SubD dst (LoadD mem)));
9701  format %{ "SUBSD  $dst,$mem" %}
9702  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
9703  ins_pipe( pipe_slow );
9704%}
9705
9706// Mul two double precision floating point values in xmm
9707instruct mulXD_reg(regXD dst, regXD src) %{
9708  predicate(UseSSE>=2);
9709  match(Set dst (MulD dst src));
9710  format %{ "MULSD  $dst,$src" %}
9711  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
9712  ins_pipe( pipe_slow );
9713%}
9714
9715instruct mulXD_imm(regXD dst, immXD con) %{
9716  predicate(UseSSE>=2);
9717  match(Set dst (MulD dst con));
9718  format %{ "MULSD  $dst,[$con]" %}
9719  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), LdImmXD(dst, con) );
9720  ins_pipe( pipe_slow );
9721%}
9722
9723instruct mulXD_mem(regXD dst, memory mem) %{
9724  predicate(UseSSE>=2);
9725  match(Set dst (MulD dst (LoadD mem)));
9726  format %{ "MULSD  $dst,$mem" %}
9727  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
9728  ins_pipe( pipe_slow );
9729%}
9730
9731// Div two double precision floating point values in xmm
9732instruct divXD_reg(regXD dst, regXD src) %{
9733  predicate(UseSSE>=2);
9734  match(Set dst (DivD dst src));
9735  format %{ "DIVSD  $dst,$src" %}
9736  opcode(0xF2, 0x0F, 0x5E);
9737  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
9738  ins_pipe( pipe_slow );
9739%}
9740
9741instruct divXD_imm(regXD dst, immXD con) %{
9742  predicate(UseSSE>=2);
9743  match(Set dst (DivD dst con));
9744  format %{ "DIVSD  $dst,[$con]" %}
9745  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), LdImmXD(dst, con));
9746  ins_pipe( pipe_slow );
9747%}
9748
9749instruct divXD_mem(regXD dst, memory mem) %{
9750  predicate(UseSSE>=2);
9751  match(Set dst (DivD dst (LoadD mem)));
9752  format %{ "DIVSD  $dst,$mem" %}
9753  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
9754  ins_pipe( pipe_slow );
9755%}
9756
9757
9758instruct mulD_reg(regD dst, regD src) %{
9759  predicate(UseSSE<=1);
9760  match(Set dst (MulD dst src));
9761  format %{ "FLD    $src\n\t"
9762            "DMULp  $dst,ST" %}
9763  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9764  ins_cost(150);
9765  ins_encode( Push_Reg_D(src),
9766              OpcP, RegOpc(dst) );
9767  ins_pipe( fpu_reg_reg );
9768%}
9769
9770// Strict FP instruction biases argument before multiply then
9771// biases result to avoid double rounding of subnormals.
9772//
9773// scale arg1 by multiplying arg1 by 2^(-15360)
9774// load arg2
9775// multiply scaled arg1 by arg2
9776// rescale product by 2^(15360)
9777//
9778instruct strictfp_mulD_reg(regDPR1 dst, regnotDPR1 src) %{
9779  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9780  match(Set dst (MulD dst src));
9781  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9782
9783  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9784            "DMULp  $dst,ST\n\t"
9785            "FLD    $src\n\t"
9786            "DMULp  $dst,ST\n\t"
9787            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9788            "DMULp  $dst,ST\n\t" %}
9789  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9790  ins_encode( strictfp_bias1(dst),
9791              Push_Reg_D(src),
9792              OpcP, RegOpc(dst),
9793              strictfp_bias2(dst) );
9794  ins_pipe( fpu_reg_reg );
9795%}
9796
9797instruct mulD_reg_imm(regD dst, immD src) %{
9798  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9799  match(Set dst (MulD dst src));
9800  ins_cost(200);
9801  format %{ "FLD_D  [$src]\n\t"
9802            "DMULp  $dst,ST" %}
9803  opcode(0xDE, 0x1); /* DE /1 */
9804  ins_encode( LdImmD(src),
9805              OpcP, RegOpc(dst) );
9806  ins_pipe( fpu_reg_mem );
9807%}
9808
9809
9810instruct mulD_reg_mem(regD dst, memory src) %{
9811  predicate( UseSSE<=1 );
9812  match(Set dst (MulD dst (LoadD src)));
9813  ins_cost(200);
9814  format %{ "FLD_D  $src\n\t"
9815            "DMULp  $dst,ST" %}
9816  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9817  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9818              OpcP, RegOpc(dst) );
9819  ins_pipe( fpu_reg_mem );
9820%}
9821
9822//
9823// Cisc-alternate to reg-reg multiply
9824instruct mulD_reg_mem_cisc(regD dst, regD src, memory mem) %{
9825  predicate( UseSSE<=1 );
9826  match(Set dst (MulD src (LoadD mem)));
9827  ins_cost(250);
9828  format %{ "FLD_D  $mem\n\t"
9829            "DMUL   ST,$src\n\t"
9830            "FSTP_D $dst" %}
9831  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9832  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9833              OpcReg_F(src),
9834              Pop_Reg_D(dst) );
9835  ins_pipe( fpu_reg_reg_mem );
9836%}
9837
9838
9839// MACRO3 -- addD a mulD
9840// This instruction is a '2-address' instruction in that the result goes
9841// back to src2.  This eliminates a move from the macro; possibly the
9842// register allocator will have to add it back (and maybe not).
9843instruct addD_mulD_reg(regD src2, regD src1, regD src0) %{
9844  predicate( UseSSE<=1 );
9845  match(Set src2 (AddD (MulD src0 src1) src2));
9846  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9847            "DMUL   ST,$src1\n\t"
9848            "DADDp  $src2,ST" %}
9849  ins_cost(250);
9850  opcode(0xDD); /* LoadD DD /0 */
9851  ins_encode( Push_Reg_F(src0),
9852              FMul_ST_reg(src1),
9853              FAddP_reg_ST(src2) );
9854  ins_pipe( fpu_reg_reg_reg );
9855%}
9856
9857
9858// MACRO3 -- subD a mulD
9859instruct subD_mulD_reg(regD src2, regD src1, regD src0) %{
9860  predicate( UseSSE<=1 );
9861  match(Set src2 (SubD (MulD src0 src1) src2));
9862  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9863            "DMUL   ST,$src1\n\t"
9864            "DSUBRp $src2,ST" %}
9865  ins_cost(250);
9866  ins_encode( Push_Reg_F(src0),
9867              FMul_ST_reg(src1),
9868              Opcode(0xDE), Opc_plus(0xE0,src2));
9869  ins_pipe( fpu_reg_reg_reg );
9870%}
9871
9872
9873instruct divD_reg(regD dst, regD src) %{
9874  predicate( UseSSE<=1 );
9875  match(Set dst (DivD dst src));
9876
9877  format %{ "FLD    $src\n\t"
9878            "FDIVp  $dst,ST" %}
9879  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9880  ins_cost(150);
9881  ins_encode( Push_Reg_D(src),
9882              OpcP, RegOpc(dst) );
9883  ins_pipe( fpu_reg_reg );
9884%}
9885
9886// Strict FP instruction biases argument before division then
9887// biases result, to avoid double rounding of subnormals.
9888//
9889// scale dividend by multiplying dividend by 2^(-15360)
9890// load divisor
9891// divide scaled dividend by divisor
9892// rescale quotient by 2^(15360)
9893//
9894instruct strictfp_divD_reg(regDPR1 dst, regnotDPR1 src) %{
9895  predicate (UseSSE<=1);
9896  match(Set dst (DivD dst src));
9897  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9898  ins_cost(01);
9899
9900  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9901            "DMULp  $dst,ST\n\t"
9902            "FLD    $src\n\t"
9903            "FDIVp  $dst,ST\n\t"
9904            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9905            "DMULp  $dst,ST\n\t" %}
9906  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9907  ins_encode( strictfp_bias1(dst),
9908              Push_Reg_D(src),
9909              OpcP, RegOpc(dst),
9910              strictfp_bias2(dst) );
9911  ins_pipe( fpu_reg_reg );
9912%}
9913
9914instruct divD_reg_round(stackSlotD dst, regD src1, regD src2) %{
9915  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9916  match(Set dst (RoundDouble (DivD src1 src2)));
9917
9918  format %{ "FLD    $src1\n\t"
9919            "FDIV   ST,$src2\n\t"
9920            "FSTP_D $dst\t# D-round" %}
9921  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9922  ins_encode( Push_Reg_D(src1),
9923              OpcP, RegOpc(src2), Pop_Mem_D(dst) );
9924  ins_pipe( fpu_mem_reg_reg );
9925%}
9926
9927
9928instruct modD_reg(regD dst, regD src, eAXRegI rax, eFlagsReg cr) %{
9929  predicate(UseSSE<=1);
9930  match(Set dst (ModD dst src));
9931  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
9932
9933  format %{ "DMOD   $dst,$src" %}
9934  ins_cost(250);
9935  ins_encode(Push_Reg_Mod_D(dst, src),
9936              emitModD(),
9937              Push_Result_Mod_D(src),
9938              Pop_Reg_D(dst));
9939  ins_pipe( pipe_slow );
9940%}
9941
9942instruct modXD_reg(regXD dst, regXD src0, regXD src1, eAXRegI rax, eFlagsReg cr) %{
9943  predicate(UseSSE>=2);
9944  match(Set dst (ModD src0 src1));
9945  effect(KILL rax, KILL cr);
9946
9947  format %{ "SUB    ESP,8\t # DMOD\n"
9948          "\tMOVSD  [ESP+0],$src1\n"
9949          "\tFLD_D  [ESP+0]\n"
9950          "\tMOVSD  [ESP+0],$src0\n"
9951          "\tFLD_D  [ESP+0]\n"
9952     "loop:\tFPREM\n"
9953          "\tFWAIT\n"
9954          "\tFNSTSW AX\n"
9955          "\tSAHF\n"
9956          "\tJP     loop\n"
9957          "\tFSTP_D [ESP+0]\n"
9958          "\tMOVSD  $dst,[ESP+0]\n"
9959          "\tADD    ESP,8\n"
9960          "\tFSTP   ST0\t # Restore FPU Stack"
9961    %}
9962  ins_cost(250);
9963  ins_encode( Push_ModD_encoding(src0, src1), emitModD(), Push_ResultXD(dst), PopFPU);
9964  ins_pipe( pipe_slow );
9965%}
9966
9967instruct sinD_reg(regDPR1 dst, regDPR1 src) %{
9968  predicate (UseSSE<=1);
9969  match(Set dst (SinD src));
9970  ins_cost(1800);
9971  format %{ "DSIN   $dst" %}
9972  opcode(0xD9, 0xFE);
9973  ins_encode( OpcP, OpcS );
9974  ins_pipe( pipe_slow );
9975%}
9976
9977instruct sinXD_reg(regXD dst, eFlagsReg cr) %{
9978  predicate (UseSSE>=2);
9979  match(Set dst (SinD dst));
9980  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
9981  ins_cost(1800);
9982  format %{ "DSIN   $dst" %}
9983  opcode(0xD9, 0xFE);
9984  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
9985  ins_pipe( pipe_slow );
9986%}
9987
9988instruct cosD_reg(regDPR1 dst, regDPR1 src) %{
9989  predicate (UseSSE<=1);
9990  match(Set dst (CosD src));
9991  ins_cost(1800);
9992  format %{ "DCOS   $dst" %}
9993  opcode(0xD9, 0xFF);
9994  ins_encode( OpcP, OpcS );
9995  ins_pipe( pipe_slow );
9996%}
9997
9998instruct cosXD_reg(regXD dst, eFlagsReg cr) %{
9999  predicate (UseSSE>=2);
10000  match(Set dst (CosD dst));
10001  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10002  ins_cost(1800);
10003  format %{ "DCOS   $dst" %}
10004  opcode(0xD9, 0xFF);
10005  ins_encode( Push_SrcXD(dst), OpcP, OpcS, Push_ResultXD(dst) );
10006  ins_pipe( pipe_slow );
10007%}
10008
10009instruct tanD_reg(regDPR1 dst, regDPR1 src) %{
10010  predicate (UseSSE<=1);
10011  match(Set dst(TanD src));
10012  format %{ "DTAN   $dst" %}
10013  ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
10014              Opcode(0xDD), Opcode(0xD8));   // fstp st
10015  ins_pipe( pipe_slow );
10016%}
10017
10018instruct tanXD_reg(regXD dst, eFlagsReg cr) %{
10019  predicate (UseSSE>=2);
10020  match(Set dst(TanD dst));
10021  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10022  format %{ "DTAN   $dst" %}
10023  ins_encode( Push_SrcXD(dst),
10024              Opcode(0xD9), Opcode(0xF2),    // fptan
10025              Opcode(0xDD), Opcode(0xD8),   // fstp st
10026              Push_ResultXD(dst) );
10027  ins_pipe( pipe_slow );
10028%}
10029
10030instruct atanD_reg(regD dst, regD src) %{
10031  predicate (UseSSE<=1);
10032  match(Set dst(AtanD dst src));
10033  format %{ "DATA   $dst,$src" %}
10034  opcode(0xD9, 0xF3);
10035  ins_encode( Push_Reg_D(src),
10036              OpcP, OpcS, RegOpc(dst) );
10037  ins_pipe( pipe_slow );
10038%}
10039
10040instruct atanXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10041  predicate (UseSSE>=2);
10042  match(Set dst(AtanD dst src));
10043  effect(KILL cr); // Push_{Src|Result}XD() uses "{SUB|ADD} ESP,8"
10044  format %{ "DATA   $dst,$src" %}
10045  opcode(0xD9, 0xF3);
10046  ins_encode( Push_SrcXD(src),
10047              OpcP, OpcS, Push_ResultXD(dst) );
10048  ins_pipe( pipe_slow );
10049%}
10050
10051instruct sqrtD_reg(regD dst, regD src) %{
10052  predicate (UseSSE<=1);
10053  match(Set dst (SqrtD src));
10054  format %{ "DSQRT  $dst,$src" %}
10055  opcode(0xFA, 0xD9);
10056  ins_encode( Push_Reg_D(src),
10057              OpcS, OpcP, Pop_Reg_D(dst) );
10058  ins_pipe( pipe_slow );
10059%}
10060
10061instruct powD_reg(regD X, regDPR1 Y, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10062  predicate (UseSSE<=1);
10063  match(Set Y (PowD X Y));  // Raise X to the Yth power
10064  effect(KILL rax, KILL rbx, KILL rcx);
10065  format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10066            "FLD_D  $X\n\t"
10067            "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10068
10069            "FDUP   \t\t\t# Q Q\n\t"
10070            "FRNDINT\t\t\t# int(Q) Q\n\t"
10071            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10072            "FISTP  dword [ESP]\n\t"
10073            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10074            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10075            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10076            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10077            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10078            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10079            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10080            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10081            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10082            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10083            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10084            "MOV    [ESP+0],0\n\t"
10085            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10086
10087            "ADD    ESP,8"
10088             %}
10089  ins_encode( push_stack_temp_qword,
10090              Push_Reg_D(X),
10091              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10092              pow_exp_core_encoding,
10093              pop_stack_temp_qword);
10094  ins_pipe( pipe_slow );
10095%}
10096
10097instruct powXD_reg(regXD dst, regXD src0, regXD src1, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx ) %{
10098  predicate (UseSSE>=2);
10099  match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
10100  effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx );
10101  format %{ "SUB    ESP,8\t\t# Fast-path POW encoding\n\t"
10102            "MOVSD  [ESP],$src1\n\t"
10103            "FLD    FPR1,$src1\n\t"
10104            "MOVSD  [ESP],$src0\n\t"
10105            "FLD    FPR1,$src0\n\t"
10106            "FYL2X  \t\t\t# Q=Y*ln2(X)\n\t"
10107
10108            "FDUP   \t\t\t# Q Q\n\t"
10109            "FRNDINT\t\t\t# int(Q) Q\n\t"
10110            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10111            "FISTP  dword [ESP]\n\t"
10112            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10113            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10114            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10115            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10116            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10117            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10118            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10119            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10120            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10121            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10122            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10123            "MOV    [ESP+0],0\n\t"
10124            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10125
10126            "FST_D  [ESP]\n\t"
10127            "MOVSD  $dst,[ESP]\n\t"
10128            "ADD    ESP,8"
10129             %}
10130  ins_encode( push_stack_temp_qword,
10131              push_xmm_to_fpr1(src1),
10132              push_xmm_to_fpr1(src0),
10133              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10134              pow_exp_core_encoding,
10135              Push_ResultXD(dst) );
10136  ins_pipe( pipe_slow );
10137%}
10138
10139
10140instruct expD_reg(regDPR1 dpr1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10141  predicate (UseSSE<=1);
10142  match(Set dpr1 (ExpD dpr1));
10143  effect(KILL rax, KILL rbx, KILL rcx);
10144  format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding"
10145            "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10146            "FMULP  \t\t\t# Q=X*log2(e)\n\t"
10147
10148            "FDUP   \t\t\t# Q Q\n\t"
10149            "FRNDINT\t\t\t# int(Q) Q\n\t"
10150            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10151            "FISTP  dword [ESP]\n\t"
10152            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10153            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10154            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10155            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10156            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10157            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10158            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10159            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10160            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10161            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10162            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10163            "MOV    [ESP+0],0\n\t"
10164            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10165
10166            "ADD    ESP,8"
10167             %}
10168  ins_encode( push_stack_temp_qword,
10169              Opcode(0xD9), Opcode(0xEA),   // fldl2e
10170              Opcode(0xDE), Opcode(0xC9),   // fmulp
10171              pow_exp_core_encoding,
10172              pop_stack_temp_qword);
10173  ins_pipe( pipe_slow );
10174%}
10175
10176instruct expXD_reg(regXD dst, regXD src, regDPR1 tmp1, eAXRegI rax, eBXRegI rbx, eCXRegI rcx) %{
10177  predicate (UseSSE>=2);
10178  match(Set dst (ExpD src));
10179  effect(KILL tmp1, KILL rax, KILL rbx, KILL rcx);
10180  format %{ "SUB    ESP,8\t\t# Fast-path EXP encoding\n\t"
10181            "MOVSD  [ESP],$src\n\t"
10182            "FLDL2E \t\t\t# Ld log2(e) X\n\t"
10183            "FMULP  \t\t\t# Q=X*log2(e) X\n\t"
10184
10185            "FDUP   \t\t\t# Q Q\n\t"
10186            "FRNDINT\t\t\t# int(Q) Q\n\t"
10187            "FSUB   ST(1),ST(0)\t# int(Q) frac(Q)\n\t"
10188            "FISTP  dword [ESP]\n\t"
10189            "F2XM1  \t\t\t# 2^frac(Q)-1 int(Q)\n\t"
10190            "FLD1   \t\t\t# 1 2^frac(Q)-1 int(Q)\n\t"
10191            "FADDP  \t\t\t# 2^frac(Q) int(Q)\n\t" // could use FADD [1.000] instead
10192            "MOV    EAX,[ESP]\t# Pick up int(Q)\n\t"
10193            "MOV    ECX,0xFFFFF800\t# Overflow mask\n\t"
10194            "ADD    EAX,1023\t\t# Double exponent bias\n\t"
10195            "MOV    EBX,EAX\t\t# Preshifted biased expo\n\t"
10196            "SHL    EAX,20\t\t# Shift exponent into place\n\t"
10197            "TEST   EBX,ECX\t\t# Check for overflow\n\t"
10198            "CMOVne EAX,ECX\t\t# If overflow, stuff NaN into EAX\n\t"
10199            "MOV    [ESP+4],EAX\t# Marshal 64-bit scaling double\n\t"
10200            "MOV    [ESP+0],0\n\t"
10201            "FMUL   ST(0),[ESP+0]\t# Scale\n\t"
10202
10203            "FST_D  [ESP]\n\t"
10204            "MOVSD  $dst,[ESP]\n\t"
10205            "ADD    ESP,8"
10206             %}
10207  ins_encode( Push_SrcXD(src),
10208              Opcode(0xD9), Opcode(0xEA),   // fldl2e
10209              Opcode(0xDE), Opcode(0xC9),   // fmulp
10210              pow_exp_core_encoding,
10211              Push_ResultXD(dst) );
10212  ins_pipe( pipe_slow );
10213%}
10214
10215
10216
10217instruct log10D_reg(regDPR1 dst, regDPR1 src) %{
10218  predicate (UseSSE<=1);
10219  // The source Double operand on FPU stack
10220  match(Set dst (Log10D src));
10221  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10222  // fxch         ; swap ST(0) with ST(1)
10223  // fyl2x        ; compute log_10(2) * log_2(x)
10224  format %{ "FLDLG2 \t\t\t#Log10\n\t"
10225            "FXCH   \n\t"
10226            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10227         %}
10228  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10229              Opcode(0xD9), Opcode(0xC9),   // fxch
10230              Opcode(0xD9), Opcode(0xF1));  // fyl2x
10231
10232  ins_pipe( pipe_slow );
10233%}
10234
10235instruct log10XD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10236  predicate (UseSSE>=2);
10237  effect(KILL cr);
10238  match(Set dst (Log10D src));
10239  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
10240  // fyl2x        ; compute log_10(2) * log_2(x)
10241  format %{ "FLDLG2 \t\t\t#Log10\n\t"
10242            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
10243         %}
10244  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
10245              Push_SrcXD(src),
10246              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10247              Push_ResultXD(dst));
10248
10249  ins_pipe( pipe_slow );
10250%}
10251
10252instruct logD_reg(regDPR1 dst, regDPR1 src) %{
10253  predicate (UseSSE<=1);
10254  // The source Double operand on FPU stack
10255  match(Set dst (LogD src));
10256  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10257  // fxch         ; swap ST(0) with ST(1)
10258  // fyl2x        ; compute log_e(2) * log_2(x)
10259  format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10260            "FXCH   \n\t"
10261            "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10262         %}
10263  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10264              Opcode(0xD9), Opcode(0xC9),   // fxch
10265              Opcode(0xD9), Opcode(0xF1));  // fyl2x
10266
10267  ins_pipe( pipe_slow );
10268%}
10269
10270instruct logXD_reg(regXD dst, regXD src, eFlagsReg cr) %{
10271  predicate (UseSSE>=2);
10272  effect(KILL cr);
10273  // The source and result Double operands in XMM registers
10274  match(Set dst (LogD src));
10275  // fldln2       ; push log_e(2) on the FPU stack; full 80-bit number
10276  // fyl2x        ; compute log_e(2) * log_2(x)
10277  format %{ "FLDLN2 \t\t\t#Log_e\n\t"
10278            "FYL2X  \t\t\t# Q=Log_e*Log_2(x)"
10279         %}
10280  ins_encode( Opcode(0xD9), Opcode(0xED),   // fldln2
10281              Push_SrcXD(src),
10282              Opcode(0xD9), Opcode(0xF1),   // fyl2x
10283              Push_ResultXD(dst));
10284  ins_pipe( pipe_slow );
10285%}
10286
10287//-------------Float Instructions-------------------------------
10288// Float Math
10289
10290// Code for float compare:
10291//     fcompp();
10292//     fwait(); fnstsw_ax();
10293//     sahf();
10294//     movl(dst, unordered_result);
10295//     jcc(Assembler::parity, exit);
10296//     movl(dst, less_result);
10297//     jcc(Assembler::below, exit);
10298//     movl(dst, equal_result);
10299//     jcc(Assembler::equal, exit);
10300//     movl(dst, greater_result);
10301//   exit:
10302
10303// P6 version of float compare, sets condition codes in EFLAGS
10304instruct cmpF_cc_P6(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10305  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10306  match(Set cr (CmpF src1 src2));
10307  effect(KILL rax);
10308  ins_cost(150);
10309  format %{ "FLD    $src1\n\t"
10310            "FUCOMIP ST,$src2  // P6 instruction\n\t"
10311            "JNP    exit\n\t"
10312            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
10313            "SAHF\n"
10314     "exit:\tNOP               // avoid branch to branch" %}
10315  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10316  ins_encode( Push_Reg_D(src1),
10317              OpcP, RegOpc(src2),
10318              cmpF_P6_fixup );
10319  ins_pipe( pipe_slow );
10320%}
10321
10322instruct cmpF_cc_P6CF(eFlagsRegUCF cr, regF src1, regF src2) %{
10323  predicate(VM_Version::supports_cmov() && UseSSE == 0);
10324  match(Set cr (CmpF src1 src2));
10325  ins_cost(100);
10326  format %{ "FLD    $src1\n\t"
10327            "FUCOMIP ST,$src2  // P6 instruction" %}
10328  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
10329  ins_encode( Push_Reg_D(src1),
10330              OpcP, RegOpc(src2));
10331  ins_pipe( pipe_slow );
10332%}
10333
10334
10335// Compare & branch
10336instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2, eAXRegI rax) %{
10337  predicate(UseSSE == 0);
10338  match(Set cr (CmpF src1 src2));
10339  effect(KILL rax);
10340  ins_cost(200);
10341  format %{ "FLD    $src1\n\t"
10342            "FCOMp  $src2\n\t"
10343            "FNSTSW AX\n\t"
10344            "TEST   AX,0x400\n\t"
10345            "JZ,s   flags\n\t"
10346            "MOV    AH,1\t# unordered treat as LT\n"
10347    "flags:\tSAHF" %}
10348  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10349  ins_encode( Push_Reg_D(src1),
10350              OpcP, RegOpc(src2),
10351              fpu_flags);
10352  ins_pipe( pipe_slow );
10353%}
10354
10355// Compare vs zero into -1,0,1
10356instruct cmpF_0(eRegI dst, regF src1, immF0 zero, eAXRegI rax, eFlagsReg cr) %{
10357  predicate(UseSSE == 0);
10358  match(Set dst (CmpF3 src1 zero));
10359  effect(KILL cr, KILL rax);
10360  ins_cost(280);
10361  format %{ "FTSTF  $dst,$src1" %}
10362  opcode(0xE4, 0xD9);
10363  ins_encode( Push_Reg_D(src1),
10364              OpcS, OpcP, PopFPU,
10365              CmpF_Result(dst));
10366  ins_pipe( pipe_slow );
10367%}
10368
10369// Compare into -1,0,1
10370instruct cmpF_reg(eRegI dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
10371  predicate(UseSSE == 0);
10372  match(Set dst (CmpF3 src1 src2));
10373  effect(KILL cr, KILL rax);
10374  ins_cost(300);
10375  format %{ "FCMPF  $dst,$src1,$src2" %}
10376  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10377  ins_encode( Push_Reg_D(src1),
10378              OpcP, RegOpc(src2),
10379              CmpF_Result(dst));
10380  ins_pipe( pipe_slow );
10381%}
10382
10383// float compare and set condition codes in EFLAGS by XMM regs
10384instruct cmpX_cc(eFlagsRegU cr, regX dst, regX src, eAXRegI rax) %{
10385  predicate(UseSSE>=1);
10386  match(Set cr (CmpF dst src));
10387  effect(KILL rax);
10388  ins_cost(145);
10389  format %{ "COMISS $dst,$src\n"
10390          "\tJNP    exit\n"
10391          "\tMOV    ah,1       // saw a NaN, set CF\n"
10392          "\tSAHF\n"
10393     "exit:\tNOP               // avoid branch to branch" %}
10394  opcode(0x0F, 0x2F);
10395  ins_encode(OpcP, OpcS, RegReg(dst, src), cmpF_P6_fixup);
10396  ins_pipe( pipe_slow );
10397%}
10398
10399instruct cmpX_ccCF(eFlagsRegUCF cr, regX dst, regX src) %{
10400  predicate(UseSSE>=1);
10401  match(Set cr (CmpF dst src));
10402  ins_cost(100);
10403  format %{ "COMISS $dst,$src" %}
10404  opcode(0x0F, 0x2F);
10405  ins_encode(OpcP, OpcS, RegReg(dst, src));
10406  ins_pipe( pipe_slow );
10407%}
10408
10409// float compare and set condition codes in EFLAGS by XMM regs
10410instruct cmpX_ccmem(eFlagsRegU cr, regX dst, memory src, eAXRegI rax) %{
10411  predicate(UseSSE>=1);
10412  match(Set cr (CmpF dst (LoadF src)));
10413  effect(KILL rax);
10414  ins_cost(165);
10415  format %{ "COMISS $dst,$src\n"
10416          "\tJNP    exit\n"
10417          "\tMOV    ah,1       // saw a NaN, set CF\n"
10418          "\tSAHF\n"
10419     "exit:\tNOP               // avoid branch to branch" %}
10420  opcode(0x0F, 0x2F);
10421  ins_encode(OpcP, OpcS, RegMem(dst, src), cmpF_P6_fixup);
10422  ins_pipe( pipe_slow );
10423%}
10424
10425instruct cmpX_ccmemCF(eFlagsRegUCF cr, regX dst, memory src) %{
10426  predicate(UseSSE>=1);
10427  match(Set cr (CmpF dst (LoadF src)));
10428  ins_cost(100);
10429  format %{ "COMISS $dst,$src" %}
10430  opcode(0x0F, 0x2F);
10431  ins_encode(OpcP, OpcS, RegMem(dst, src));
10432  ins_pipe( pipe_slow );
10433%}
10434
10435// Compare into -1,0,1 in XMM
10436instruct cmpX_reg(eRegI dst, regX src1, regX src2, eFlagsReg cr) %{
10437  predicate(UseSSE>=1);
10438  match(Set dst (CmpF3 src1 src2));
10439  effect(KILL cr);
10440  ins_cost(255);
10441  format %{ "XOR    $dst,$dst\n"
10442          "\tCOMISS $src1,$src2\n"
10443          "\tJP,s   nan\n"
10444          "\tJEQ,s  exit\n"
10445          "\tJA,s   inc\n"
10446      "nan:\tDEC    $dst\n"
10447          "\tJMP,s  exit\n"
10448      "inc:\tINC    $dst\n"
10449      "exit:"
10450                %}
10451  opcode(0x0F, 0x2F);
10452  ins_encode(Xor_Reg(dst), OpcP, OpcS, RegReg(src1, src2), CmpX_Result(dst));
10453  ins_pipe( pipe_slow );
10454%}
10455
10456// Compare into -1,0,1 in XMM and memory
10457instruct cmpX_regmem(eRegI dst, regX src1, memory mem, eFlagsReg cr) %{
10458  predicate(UseSSE>=1);
10459  match(Set dst (CmpF3 src1 (LoadF mem)));
10460  effect(KILL cr);
10461  ins_cost(275);
10462  format %{ "COMISS $src1,$mem\n"
10463          "\tMOV    $dst,0\t\t# do not blow flags\n"
10464          "\tJP,s   nan\n"
10465          "\tJEQ,s  exit\n"
10466          "\tJA,s   inc\n"
10467      "nan:\tDEC    $dst\n"
10468          "\tJMP,s  exit\n"
10469      "inc:\tINC    $dst\n"
10470      "exit:"
10471                %}
10472  opcode(0x0F, 0x2F);
10473  ins_encode(OpcP, OpcS, RegMem(src1, mem), LdImmI(dst,0x0), CmpX_Result(dst));
10474  ins_pipe( pipe_slow );
10475%}
10476
10477// Spill to obtain 24-bit precision
10478instruct subF24_reg(stackSlotF dst, regF src1, regF src2) %{
10479  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10480  match(Set dst (SubF src1 src2));
10481
10482  format %{ "FSUB   $dst,$src1 - $src2" %}
10483  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10484  ins_encode( Push_Reg_F(src1),
10485              OpcReg_F(src2),
10486              Pop_Mem_F(dst) );
10487  ins_pipe( fpu_mem_reg_reg );
10488%}
10489//
10490// This instruction does not round to 24-bits
10491instruct subF_reg(regF dst, regF src) %{
10492  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10493  match(Set dst (SubF dst src));
10494
10495  format %{ "FSUB   $dst,$src" %}
10496  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10497  ins_encode( Push_Reg_F(src),
10498              OpcP, RegOpc(dst) );
10499  ins_pipe( fpu_reg_reg );
10500%}
10501
10502// Spill to obtain 24-bit precision
10503instruct addF24_reg(stackSlotF dst, regF src1, regF src2) %{
10504  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10505  match(Set dst (AddF src1 src2));
10506
10507  format %{ "FADD   $dst,$src1,$src2" %}
10508  opcode(0xD8, 0x0); /* D8 C0+i */
10509  ins_encode( Push_Reg_F(src2),
10510              OpcReg_F(src1),
10511              Pop_Mem_F(dst) );
10512  ins_pipe( fpu_mem_reg_reg );
10513%}
10514//
10515// This instruction does not round to 24-bits
10516instruct addF_reg(regF dst, regF src) %{
10517  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10518  match(Set dst (AddF dst src));
10519
10520  format %{ "FLD    $src\n\t"
10521            "FADDp  $dst,ST" %}
10522  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10523  ins_encode( Push_Reg_F(src),
10524              OpcP, RegOpc(dst) );
10525  ins_pipe( fpu_reg_reg );
10526%}
10527
10528// Add two single precision floating point values in xmm
10529instruct addX_reg(regX dst, regX src) %{
10530  predicate(UseSSE>=1);
10531  match(Set dst (AddF dst src));
10532  format %{ "ADDSS  $dst,$src" %}
10533  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegReg(dst, src));
10534  ins_pipe( pipe_slow );
10535%}
10536
10537instruct addX_imm(regX dst, immXF con) %{
10538  predicate(UseSSE>=1);
10539  match(Set dst (AddF dst con));
10540  format %{ "ADDSS  $dst,[$con]" %}
10541  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), LdImmX(dst, con) );
10542  ins_pipe( pipe_slow );
10543%}
10544
10545instruct addX_mem(regX dst, memory mem) %{
10546  predicate(UseSSE>=1);
10547  match(Set dst (AddF dst (LoadF mem)));
10548  format %{ "ADDSS  $dst,$mem" %}
10549  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x58), RegMem(dst, mem));
10550  ins_pipe( pipe_slow );
10551%}
10552
10553// Subtract two single precision floating point values in xmm
10554instruct subX_reg(regX dst, regX src) %{
10555  predicate(UseSSE>=1);
10556  match(Set dst (SubF dst src));
10557  format %{ "SUBSS  $dst,$src" %}
10558  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegReg(dst, src));
10559  ins_pipe( pipe_slow );
10560%}
10561
10562instruct subX_imm(regX dst, immXF con) %{
10563  predicate(UseSSE>=1);
10564  match(Set dst (SubF dst con));
10565  format %{ "SUBSS  $dst,[$con]" %}
10566  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), LdImmX(dst, con) );
10567  ins_pipe( pipe_slow );
10568%}
10569
10570instruct subX_mem(regX dst, memory mem) %{
10571  predicate(UseSSE>=1);
10572  match(Set dst (SubF dst (LoadF mem)));
10573  format %{ "SUBSS  $dst,$mem" %}
10574  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5C), RegMem(dst,mem));
10575  ins_pipe( pipe_slow );
10576%}
10577
10578// Multiply two single precision floating point values in xmm
10579instruct mulX_reg(regX dst, regX src) %{
10580  predicate(UseSSE>=1);
10581  match(Set dst (MulF dst src));
10582  format %{ "MULSS  $dst,$src" %}
10583  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegReg(dst, src));
10584  ins_pipe( pipe_slow );
10585%}
10586
10587instruct mulX_imm(regX dst, immXF con) %{
10588  predicate(UseSSE>=1);
10589  match(Set dst (MulF dst con));
10590  format %{ "MULSS  $dst,[$con]" %}
10591  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), LdImmX(dst, con) );
10592  ins_pipe( pipe_slow );
10593%}
10594
10595instruct mulX_mem(regX dst, memory mem) %{
10596  predicate(UseSSE>=1);
10597  match(Set dst (MulF dst (LoadF mem)));
10598  format %{ "MULSS  $dst,$mem" %}
10599  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x59), RegMem(dst,mem));
10600  ins_pipe( pipe_slow );
10601%}
10602
10603// Divide two single precision floating point values in xmm
10604instruct divX_reg(regX dst, regX src) %{
10605  predicate(UseSSE>=1);
10606  match(Set dst (DivF dst src));
10607  format %{ "DIVSS  $dst,$src" %}
10608  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegReg(dst, src));
10609  ins_pipe( pipe_slow );
10610%}
10611
10612instruct divX_imm(regX dst, immXF con) %{
10613  predicate(UseSSE>=1);
10614  match(Set dst (DivF dst con));
10615  format %{ "DIVSS  $dst,[$con]" %}
10616  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), LdImmX(dst, con) );
10617  ins_pipe( pipe_slow );
10618%}
10619
10620instruct divX_mem(regX dst, memory mem) %{
10621  predicate(UseSSE>=1);
10622  match(Set dst (DivF dst (LoadF mem)));
10623  format %{ "DIVSS  $dst,$mem" %}
10624  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x5E), RegMem(dst,mem));
10625  ins_pipe( pipe_slow );
10626%}
10627
10628// Get the square root of a single precision floating point values in xmm
10629instruct sqrtX_reg(regX dst, regX src) %{
10630  predicate(UseSSE>=1);
10631  match(Set dst (ConvD2F (SqrtD (ConvF2D src))));
10632  format %{ "SQRTSS $dst,$src" %}
10633  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
10634  ins_pipe( pipe_slow );
10635%}
10636
10637instruct sqrtX_mem(regX dst, memory mem) %{
10638  predicate(UseSSE>=1);
10639  match(Set dst (ConvD2F (SqrtD (ConvF2D (LoadF mem)))));
10640  format %{ "SQRTSS $dst,$mem" %}
10641  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
10642  ins_pipe( pipe_slow );
10643%}
10644
10645// Get the square root of a double precision floating point values in xmm
10646instruct sqrtXD_reg(regXD dst, regXD src) %{
10647  predicate(UseSSE>=2);
10648  match(Set dst (SqrtD src));
10649  format %{ "SQRTSD $dst,$src" %}
10650  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegReg(dst, src));
10651  ins_pipe( pipe_slow );
10652%}
10653
10654instruct sqrtXD_mem(regXD dst, memory mem) %{
10655  predicate(UseSSE>=2);
10656  match(Set dst (SqrtD (LoadD mem)));
10657  format %{ "SQRTSD $dst,$mem" %}
10658  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x51), RegMem(dst, mem));
10659  ins_pipe( pipe_slow );
10660%}
10661
10662instruct absF_reg(regFPR1 dst, regFPR1 src) %{
10663  predicate(UseSSE==0);
10664  match(Set dst (AbsF src));
10665  ins_cost(100);
10666  format %{ "FABS" %}
10667  opcode(0xE1, 0xD9);
10668  ins_encode( OpcS, OpcP );
10669  ins_pipe( fpu_reg_reg );
10670%}
10671
10672instruct absX_reg(regX dst ) %{
10673  predicate(UseSSE>=1);
10674  match(Set dst (AbsF dst));
10675  format %{ "ANDPS  $dst,[0x7FFFFFFF]\t# ABS F by sign masking" %}
10676  ins_encode( AbsXF_encoding(dst));
10677  ins_pipe( pipe_slow );
10678%}
10679
10680instruct negF_reg(regFPR1 dst, regFPR1 src) %{
10681  predicate(UseSSE==0);
10682  match(Set dst (NegF src));
10683  ins_cost(100);
10684  format %{ "FCHS" %}
10685  opcode(0xE0, 0xD9);
10686  ins_encode( OpcS, OpcP );
10687  ins_pipe( fpu_reg_reg );
10688%}
10689
10690instruct negX_reg( regX dst ) %{
10691  predicate(UseSSE>=1);
10692  match(Set dst (NegF dst));
10693  format %{ "XORPS  $dst,[0x80000000]\t# CHS F by sign flipping" %}
10694  ins_encode( NegXF_encoding(dst));
10695  ins_pipe( pipe_slow );
10696%}
10697
10698// Cisc-alternate to addF_reg
10699// Spill to obtain 24-bit precision
10700instruct addF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
10701  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10702  match(Set dst (AddF src1 (LoadF src2)));
10703
10704  format %{ "FLD    $src2\n\t"
10705            "FADD   ST,$src1\n\t"
10706            "FSTP_S $dst" %}
10707  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10708  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10709              OpcReg_F(src1),
10710              Pop_Mem_F(dst) );
10711  ins_pipe( fpu_mem_reg_mem );
10712%}
10713//
10714// Cisc-alternate to addF_reg
10715// This instruction does not round to 24-bits
10716instruct addF_reg_mem(regF dst, memory src) %{
10717  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10718  match(Set dst (AddF dst (LoadF src)));
10719
10720  format %{ "FADD   $dst,$src" %}
10721  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10722  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10723              OpcP, RegOpc(dst) );
10724  ins_pipe( fpu_reg_mem );
10725%}
10726
10727// // Following two instructions for _222_mpegaudio
10728// Spill to obtain 24-bit precision
10729instruct addF24_mem_reg(stackSlotF dst, regF src2, memory src1 ) %{
10730  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10731  match(Set dst (AddF src1 src2));
10732
10733  format %{ "FADD   $dst,$src1,$src2" %}
10734  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10735  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10736              OpcReg_F(src2),
10737              Pop_Mem_F(dst) );
10738  ins_pipe( fpu_mem_reg_mem );
10739%}
10740
10741// Cisc-spill variant
10742// Spill to obtain 24-bit precision
10743instruct addF24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10744  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10745  match(Set dst (AddF src1 (LoadF src2)));
10746
10747  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10748  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10749  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10750              set_instruction_start,
10751              OpcP, RMopc_Mem(secondary,src1),
10752              Pop_Mem_F(dst) );
10753  ins_pipe( fpu_mem_mem_mem );
10754%}
10755
10756// Spill to obtain 24-bit precision
10757instruct addF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10758  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10759  match(Set dst (AddF src1 src2));
10760
10761  format %{ "FADD   $dst,$src1,$src2" %}
10762  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10763  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10764              set_instruction_start,
10765              OpcP, RMopc_Mem(secondary,src1),
10766              Pop_Mem_F(dst) );
10767  ins_pipe( fpu_mem_mem_mem );
10768%}
10769
10770
10771// Spill to obtain 24-bit precision
10772instruct addF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
10773  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10774  match(Set dst (AddF src1 src2));
10775  format %{ "FLD    $src1\n\t"
10776            "FADD   $src2\n\t"
10777            "FSTP_S $dst"  %}
10778  opcode(0xD8, 0x00);       /* D8 /0 */
10779  ins_encode( Push_Reg_F(src1),
10780              Opc_MemImm_F(src2),
10781              Pop_Mem_F(dst));
10782  ins_pipe( fpu_mem_reg_con );
10783%}
10784//
10785// This instruction does not round to 24-bits
10786instruct addF_reg_imm(regF dst, regF src1, immF src2) %{
10787  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10788  match(Set dst (AddF src1 src2));
10789  format %{ "FLD    $src1\n\t"
10790            "FADD   $src2\n\t"
10791            "FSTP_S $dst"  %}
10792  opcode(0xD8, 0x00);       /* D8 /0 */
10793  ins_encode( Push_Reg_F(src1),
10794              Opc_MemImm_F(src2),
10795              Pop_Reg_F(dst));
10796  ins_pipe( fpu_reg_reg_con );
10797%}
10798
10799// Spill to obtain 24-bit precision
10800instruct mulF24_reg(stackSlotF dst, regF src1, regF src2) %{
10801  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10802  match(Set dst (MulF src1 src2));
10803
10804  format %{ "FLD    $src1\n\t"
10805            "FMUL   $src2\n\t"
10806            "FSTP_S $dst"  %}
10807  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10808  ins_encode( Push_Reg_F(src1),
10809              OpcReg_F(src2),
10810              Pop_Mem_F(dst) );
10811  ins_pipe( fpu_mem_reg_reg );
10812%}
10813//
10814// This instruction does not round to 24-bits
10815instruct mulF_reg(regF dst, regF src1, regF src2) %{
10816  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10817  match(Set dst (MulF src1 src2));
10818
10819  format %{ "FLD    $src1\n\t"
10820            "FMUL   $src2\n\t"
10821            "FSTP_S $dst"  %}
10822  opcode(0xD8, 0x1); /* D8 C8+i */
10823  ins_encode( Push_Reg_F(src2),
10824              OpcReg_F(src1),
10825              Pop_Reg_F(dst) );
10826  ins_pipe( fpu_reg_reg_reg );
10827%}
10828
10829
10830// Spill to obtain 24-bit precision
10831// Cisc-alternate to reg-reg multiply
10832instruct mulF24_reg_mem(stackSlotF dst, regF src1, memory src2) %{
10833  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10834  match(Set dst (MulF src1 (LoadF src2)));
10835
10836  format %{ "FLD_S  $src2\n\t"
10837            "FMUL   $src1\n\t"
10838            "FSTP_S $dst"  %}
10839  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10840  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10841              OpcReg_F(src1),
10842              Pop_Mem_F(dst) );
10843  ins_pipe( fpu_mem_reg_mem );
10844%}
10845//
10846// This instruction does not round to 24-bits
10847// Cisc-alternate to reg-reg multiply
10848instruct mulF_reg_mem(regF dst, regF src1, memory src2) %{
10849  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10850  match(Set dst (MulF src1 (LoadF src2)));
10851
10852  format %{ "FMUL   $dst,$src1,$src2" %}
10853  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10854  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10855              OpcReg_F(src1),
10856              Pop_Reg_F(dst) );
10857  ins_pipe( fpu_reg_reg_mem );
10858%}
10859
10860// Spill to obtain 24-bit precision
10861instruct mulF24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10862  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10863  match(Set dst (MulF src1 src2));
10864
10865  format %{ "FMUL   $dst,$src1,$src2" %}
10866  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10867  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10868              set_instruction_start,
10869              OpcP, RMopc_Mem(secondary,src1),
10870              Pop_Mem_F(dst) );
10871  ins_pipe( fpu_mem_mem_mem );
10872%}
10873
10874// Spill to obtain 24-bit precision
10875instruct mulF24_reg_imm(stackSlotF dst, regF src1, immF src2) %{
10876  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10877  match(Set dst (MulF src1 src2));
10878
10879  format %{ "FMULc $dst,$src1,$src2" %}
10880  opcode(0xD8, 0x1);  /* D8 /1*/
10881  ins_encode( Push_Reg_F(src1),
10882              Opc_MemImm_F(src2),
10883              Pop_Mem_F(dst));
10884  ins_pipe( fpu_mem_reg_con );
10885%}
10886//
10887// This instruction does not round to 24-bits
10888instruct mulF_reg_imm(regF dst, regF src1, immF src2) %{
10889  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10890  match(Set dst (MulF src1 src2));
10891
10892  format %{ "FMULc $dst. $src1, $src2" %}
10893  opcode(0xD8, 0x1);  /* D8 /1*/
10894  ins_encode( Push_Reg_F(src1),
10895              Opc_MemImm_F(src2),
10896              Pop_Reg_F(dst));
10897  ins_pipe( fpu_reg_reg_con );
10898%}
10899
10900
10901//
10902// MACRO1 -- subsume unshared load into mulF
10903// This instruction does not round to 24-bits
10904instruct mulF_reg_load1(regF dst, regF src, memory mem1 ) %{
10905  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10906  match(Set dst (MulF (LoadF mem1) src));
10907
10908  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10909            "FMUL   ST,$src\n\t"
10910            "FSTP   $dst" %}
10911  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10912  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10913              OpcReg_F(src),
10914              Pop_Reg_F(dst) );
10915  ins_pipe( fpu_reg_reg_mem );
10916%}
10917//
10918// MACRO2 -- addF a mulF which subsumed an unshared load
10919// This instruction does not round to 24-bits
10920instruct addF_mulF_reg_load1(regF dst, memory mem1, regF src1, regF src2) %{
10921  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10922  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10923  ins_cost(95);
10924
10925  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10926            "FMUL   ST,$src1  subsume mulF left load\n\t"
10927            "FADD   ST,$src2\n\t"
10928            "FSTP   $dst" %}
10929  opcode(0xD9); /* LoadF D9 /0 */
10930  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10931              FMul_ST_reg(src1),
10932              FAdd_ST_reg(src2),
10933              Pop_Reg_F(dst) );
10934  ins_pipe( fpu_reg_mem_reg_reg );
10935%}
10936
10937// MACRO3 -- addF a mulF
10938// This instruction does not round to 24-bits.  It is a '2-address'
10939// instruction in that the result goes back to src2.  This eliminates
10940// a move from the macro; possibly the register allocator will have
10941// to add it back (and maybe not).
10942instruct addF_mulF_reg(regF src2, regF src1, regF src0) %{
10943  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10944  match(Set src2 (AddF (MulF src0 src1) src2));
10945
10946  format %{ "FLD    $src0     ===MACRO3===\n\t"
10947            "FMUL   ST,$src1\n\t"
10948            "FADDP  $src2,ST" %}
10949  opcode(0xD9); /* LoadF D9 /0 */
10950  ins_encode( Push_Reg_F(src0),
10951              FMul_ST_reg(src1),
10952              FAddP_reg_ST(src2) );
10953  ins_pipe( fpu_reg_reg_reg );
10954%}
10955
10956// MACRO4 -- divF subF
10957// This instruction does not round to 24-bits
10958instruct subF_divF_reg(regF dst, regF src1, regF src2, regF src3) %{
10959  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10960  match(Set dst (DivF (SubF src2 src1) src3));
10961
10962  format %{ "FLD    $src2   ===MACRO4===\n\t"
10963            "FSUB   ST,$src1\n\t"
10964            "FDIV   ST,$src3\n\t"
10965            "FSTP  $dst" %}
10966  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10967  ins_encode( Push_Reg_F(src2),
10968              subF_divF_encode(src1,src3),
10969              Pop_Reg_F(dst) );
10970  ins_pipe( fpu_reg_reg_reg_reg );
10971%}
10972
10973// Spill to obtain 24-bit precision
10974instruct divF24_reg(stackSlotF dst, regF src1, regF src2) %{
10975  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10976  match(Set dst (DivF src1 src2));
10977
10978  format %{ "FDIV   $dst,$src1,$src2" %}
10979  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10980  ins_encode( Push_Reg_F(src1),
10981              OpcReg_F(src2),
10982              Pop_Mem_F(dst) );
10983  ins_pipe( fpu_mem_reg_reg );
10984%}
10985//
10986// This instruction does not round to 24-bits
10987instruct divF_reg(regF dst, regF src) %{
10988  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10989  match(Set dst (DivF dst src));
10990
10991  format %{ "FDIV   $dst,$src" %}
10992  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10993  ins_encode( Push_Reg_F(src),
10994              OpcP, RegOpc(dst) );
10995  ins_pipe( fpu_reg_reg );
10996%}
10997
10998
10999// Spill to obtain 24-bit precision
11000instruct modF24_reg(stackSlotF dst, regF src1, regF src2, eAXRegI rax, eFlagsReg cr) %{
11001  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11002  match(Set dst (ModF src1 src2));
11003  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11004
11005  format %{ "FMOD   $dst,$src1,$src2" %}
11006  ins_encode( Push_Reg_Mod_D(src1, src2),
11007              emitModD(),
11008              Push_Result_Mod_D(src2),
11009              Pop_Mem_F(dst));
11010  ins_pipe( pipe_slow );
11011%}
11012//
11013// This instruction does not round to 24-bits
11014instruct modF_reg(regF dst, regF src, eAXRegI rax, eFlagsReg cr) %{
11015  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11016  match(Set dst (ModF dst src));
11017  effect(KILL rax, KILL cr); // emitModD() uses EAX and EFLAGS
11018
11019  format %{ "FMOD   $dst,$src" %}
11020  ins_encode(Push_Reg_Mod_D(dst, src),
11021              emitModD(),
11022              Push_Result_Mod_D(src),
11023              Pop_Reg_F(dst));
11024  ins_pipe( pipe_slow );
11025%}
11026
11027instruct modX_reg(regX dst, regX src0, regX src1, eAXRegI rax, eFlagsReg cr) %{
11028  predicate(UseSSE>=1);
11029  match(Set dst (ModF src0 src1));
11030  effect(KILL rax, KILL cr);
11031  format %{ "SUB    ESP,4\t # FMOD\n"
11032          "\tMOVSS  [ESP+0],$src1\n"
11033          "\tFLD_S  [ESP+0]\n"
11034          "\tMOVSS  [ESP+0],$src0\n"
11035          "\tFLD_S  [ESP+0]\n"
11036     "loop:\tFPREM\n"
11037          "\tFWAIT\n"
11038          "\tFNSTSW AX\n"
11039          "\tSAHF\n"
11040          "\tJP     loop\n"
11041          "\tFSTP_S [ESP+0]\n"
11042          "\tMOVSS  $dst,[ESP+0]\n"
11043          "\tADD    ESP,4\n"
11044          "\tFSTP   ST0\t # Restore FPU Stack"
11045    %}
11046  ins_cost(250);
11047  ins_encode( Push_ModX_encoding(src0, src1), emitModD(), Push_ResultX(dst,0x4), PopFPU);
11048  ins_pipe( pipe_slow );
11049%}
11050
11051
11052//----------Arithmetic Conversion Instructions---------------------------------
11053// The conversions operations are all Alpha sorted.  Please keep it that way!
11054
11055instruct roundFloat_mem_reg(stackSlotF dst, regF src) %{
11056  predicate(UseSSE==0);
11057  match(Set dst (RoundFloat src));
11058  ins_cost(125);
11059  format %{ "FST_S  $dst,$src\t# F-round" %}
11060  ins_encode( Pop_Mem_Reg_F(dst, src) );
11061  ins_pipe( fpu_mem_reg );
11062%}
11063
11064instruct roundDouble_mem_reg(stackSlotD dst, regD src) %{
11065  predicate(UseSSE<=1);
11066  match(Set dst (RoundDouble src));
11067  ins_cost(125);
11068  format %{ "FST_D  $dst,$src\t# D-round" %}
11069  ins_encode( Pop_Mem_Reg_D(dst, src) );
11070  ins_pipe( fpu_mem_reg );
11071%}
11072
11073// Force rounding to 24-bit precision and 6-bit exponent
11074instruct convD2F_reg(stackSlotF dst, regD src) %{
11075  predicate(UseSSE==0);
11076  match(Set dst (ConvD2F src));
11077  format %{ "FST_S  $dst,$src\t# F-round" %}
11078  expand %{
11079    roundFloat_mem_reg(dst,src);
11080  %}
11081%}
11082
11083// Force rounding to 24-bit precision and 6-bit exponent
11084instruct convD2X_reg(regX dst, regD src, eFlagsReg cr) %{
11085  predicate(UseSSE==1);
11086  match(Set dst (ConvD2F src));
11087  effect( KILL cr );
11088  format %{ "SUB    ESP,4\n\t"
11089            "FST_S  [ESP],$src\t# F-round\n\t"
11090            "MOVSS  $dst,[ESP]\n\t"
11091            "ADD ESP,4" %}
11092  ins_encode( D2X_encoding(dst, src) );
11093  ins_pipe( pipe_slow );
11094%}
11095
11096// Force rounding double precision to single precision
11097instruct convXD2X_reg(regX dst, regXD src) %{
11098  predicate(UseSSE>=2);
11099  match(Set dst (ConvD2F src));
11100  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
11101  opcode(0xF2, 0x0F, 0x5A);
11102  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11103  ins_pipe( pipe_slow );
11104%}
11105
11106instruct convF2D_reg_reg(regD dst, regF src) %{
11107  predicate(UseSSE==0);
11108  match(Set dst (ConvF2D src));
11109  format %{ "FST_S  $dst,$src\t# D-round" %}
11110  ins_encode( Pop_Reg_Reg_D(dst, src));
11111  ins_pipe( fpu_reg_reg );
11112%}
11113
11114instruct convF2D_reg(stackSlotD dst, regF src) %{
11115  predicate(UseSSE==1);
11116  match(Set dst (ConvF2D src));
11117  format %{ "FST_D  $dst,$src\t# D-round" %}
11118  expand %{
11119    roundDouble_mem_reg(dst,src);
11120  %}
11121%}
11122
11123instruct convX2D_reg(regD dst, regX src, eFlagsReg cr) %{
11124  predicate(UseSSE==1);
11125  match(Set dst (ConvF2D src));
11126  effect( KILL cr );
11127  format %{ "SUB    ESP,4\n\t"
11128            "MOVSS  [ESP] $src\n\t"
11129            "FLD_S  [ESP]\n\t"
11130            "ADD    ESP,4\n\t"
11131            "FSTP   $dst\t# D-round" %}
11132  ins_encode( X2D_encoding(dst, src), Pop_Reg_D(dst));
11133  ins_pipe( pipe_slow );
11134%}
11135
11136instruct convX2XD_reg(regXD dst, regX src) %{
11137  predicate(UseSSE>=2);
11138  match(Set dst (ConvF2D src));
11139  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
11140  opcode(0xF3, 0x0F, 0x5A);
11141  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11142  ins_pipe( pipe_slow );
11143%}
11144
11145// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11146instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
11147  predicate(UseSSE<=1);
11148  match(Set dst (ConvD2I src));
11149  effect( KILL tmp, KILL cr );
11150  format %{ "FLD    $src\t# Convert double to int \n\t"
11151            "FLDCW  trunc mode\n\t"
11152            "SUB    ESP,4\n\t"
11153            "FISTp  [ESP + #0]\n\t"
11154            "FLDCW  std/24-bit mode\n\t"
11155            "POP    EAX\n\t"
11156            "CMP    EAX,0x80000000\n\t"
11157            "JNE,s  fast\n\t"
11158            "FLD_D  $src\n\t"
11159            "CALL   d2i_wrapper\n"
11160      "fast:" %}
11161  ins_encode( Push_Reg_D(src), D2I_encoding(src) );
11162  ins_pipe( pipe_slow );
11163%}
11164
11165// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
11166instruct convXD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regXD src, eFlagsReg cr ) %{
11167  predicate(UseSSE>=2);
11168  match(Set dst (ConvD2I src));
11169  effect( KILL tmp, KILL cr );
11170  format %{ "CVTTSD2SI $dst, $src\n\t"
11171            "CMP    $dst,0x80000000\n\t"
11172            "JNE,s  fast\n\t"
11173            "SUB    ESP, 8\n\t"
11174            "MOVSD  [ESP], $src\n\t"
11175            "FLD_D  [ESP]\n\t"
11176            "ADD    ESP, 8\n\t"
11177            "CALL   d2i_wrapper\n"
11178      "fast:" %}
11179  opcode(0x1); // double-precision conversion
11180  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11181  ins_pipe( pipe_slow );
11182%}
11183
11184instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
11185  predicate(UseSSE<=1);
11186  match(Set dst (ConvD2L src));
11187  effect( KILL cr );
11188  format %{ "FLD    $src\t# Convert double to long\n\t"
11189            "FLDCW  trunc mode\n\t"
11190            "SUB    ESP,8\n\t"
11191            "FISTp  [ESP + #0]\n\t"
11192            "FLDCW  std/24-bit mode\n\t"
11193            "POP    EAX\n\t"
11194            "POP    EDX\n\t"
11195            "CMP    EDX,0x80000000\n\t"
11196            "JNE,s  fast\n\t"
11197            "TEST   EAX,EAX\n\t"
11198            "JNE,s  fast\n\t"
11199            "FLD    $src\n\t"
11200            "CALL   d2l_wrapper\n"
11201      "fast:" %}
11202  ins_encode( Push_Reg_D(src),  D2L_encoding(src) );
11203  ins_pipe( pipe_slow );
11204%}
11205
11206// XMM lacks a float/double->long conversion, so use the old FPU stack.
11207instruct convXD2L_reg_reg( eADXRegL dst, regXD src, eFlagsReg cr ) %{
11208  predicate (UseSSE>=2);
11209  match(Set dst (ConvD2L src));
11210  effect( KILL cr );
11211  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
11212            "MOVSD  [ESP],$src\n\t"
11213            "FLD_D  [ESP]\n\t"
11214            "FLDCW  trunc mode\n\t"
11215            "FISTp  [ESP + #0]\n\t"
11216            "FLDCW  std/24-bit mode\n\t"
11217            "POP    EAX\n\t"
11218            "POP    EDX\n\t"
11219            "CMP    EDX,0x80000000\n\t"
11220            "JNE,s  fast\n\t"
11221            "TEST   EAX,EAX\n\t"
11222            "JNE,s  fast\n\t"
11223            "SUB    ESP,8\n\t"
11224            "MOVSD  [ESP],$src\n\t"
11225            "FLD_D  [ESP]\n\t"
11226            "CALL   d2l_wrapper\n"
11227      "fast:" %}
11228  ins_encode( XD2L_encoding(src) );
11229  ins_pipe( pipe_slow );
11230%}
11231
11232// Convert a double to an int.  Java semantics require we do complex
11233// manglations in the corner cases.  So we set the rounding mode to
11234// 'zero', store the darned double down as an int, and reset the
11235// rounding mode to 'nearest'.  The hardware stores a flag value down
11236// if we would overflow or converted a NAN; we check for this and
11237// and go the slow path if needed.
11238instruct convF2I_reg_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
11239  predicate(UseSSE==0);
11240  match(Set dst (ConvF2I src));
11241  effect( KILL tmp, KILL cr );
11242  format %{ "FLD    $src\t# Convert float to int \n\t"
11243            "FLDCW  trunc mode\n\t"
11244            "SUB    ESP,4\n\t"
11245            "FISTp  [ESP + #0]\n\t"
11246            "FLDCW  std/24-bit mode\n\t"
11247            "POP    EAX\n\t"
11248            "CMP    EAX,0x80000000\n\t"
11249            "JNE,s  fast\n\t"
11250            "FLD    $src\n\t"
11251            "CALL   d2i_wrapper\n"
11252      "fast:" %}
11253  // D2I_encoding works for F2I
11254  ins_encode( Push_Reg_F(src), D2I_encoding(src) );
11255  ins_pipe( pipe_slow );
11256%}
11257
11258// Convert a float in xmm to an int reg.
11259instruct convX2I_reg(eAXRegI dst, eDXRegI tmp, regX src, eFlagsReg cr ) %{
11260  predicate(UseSSE>=1);
11261  match(Set dst (ConvF2I src));
11262  effect( KILL tmp, KILL cr );
11263  format %{ "CVTTSS2SI $dst, $src\n\t"
11264            "CMP    $dst,0x80000000\n\t"
11265            "JNE,s  fast\n\t"
11266            "SUB    ESP, 4\n\t"
11267            "MOVSS  [ESP], $src\n\t"
11268            "FLD    [ESP]\n\t"
11269            "ADD    ESP, 4\n\t"
11270            "CALL   d2i_wrapper\n"
11271      "fast:" %}
11272  opcode(0x0); // single-precision conversion
11273  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x2C), FX2I_encoding(src,dst));
11274  ins_pipe( pipe_slow );
11275%}
11276
11277instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
11278  predicate(UseSSE==0);
11279  match(Set dst (ConvF2L src));
11280  effect( KILL cr );
11281  format %{ "FLD    $src\t# Convert float to long\n\t"
11282            "FLDCW  trunc mode\n\t"
11283            "SUB    ESP,8\n\t"
11284            "FISTp  [ESP + #0]\n\t"
11285            "FLDCW  std/24-bit mode\n\t"
11286            "POP    EAX\n\t"
11287            "POP    EDX\n\t"
11288            "CMP    EDX,0x80000000\n\t"
11289            "JNE,s  fast\n\t"
11290            "TEST   EAX,EAX\n\t"
11291            "JNE,s  fast\n\t"
11292            "FLD    $src\n\t"
11293            "CALL   d2l_wrapper\n"
11294      "fast:" %}
11295  // D2L_encoding works for F2L
11296  ins_encode( Push_Reg_F(src), D2L_encoding(src) );
11297  ins_pipe( pipe_slow );
11298%}
11299
11300// XMM lacks a float/double->long conversion, so use the old FPU stack.
11301instruct convX2L_reg_reg( eADXRegL dst, regX src, eFlagsReg cr ) %{
11302  predicate (UseSSE>=1);
11303  match(Set dst (ConvF2L src));
11304  effect( KILL cr );
11305  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
11306            "MOVSS  [ESP],$src\n\t"
11307            "FLD_S  [ESP]\n\t"
11308            "FLDCW  trunc mode\n\t"
11309            "FISTp  [ESP + #0]\n\t"
11310            "FLDCW  std/24-bit mode\n\t"
11311            "POP    EAX\n\t"
11312            "POP    EDX\n\t"
11313            "CMP    EDX,0x80000000\n\t"
11314            "JNE,s  fast\n\t"
11315            "TEST   EAX,EAX\n\t"
11316            "JNE,s  fast\n\t"
11317            "SUB    ESP,4\t# Convert float to long\n\t"
11318            "MOVSS  [ESP],$src\n\t"
11319            "FLD_S  [ESP]\n\t"
11320            "ADD    ESP,4\n\t"
11321            "CALL   d2l_wrapper\n"
11322      "fast:" %}
11323  ins_encode( X2L_encoding(src) );
11324  ins_pipe( pipe_slow );
11325%}
11326
11327instruct convI2D_reg(regD dst, stackSlotI src) %{
11328  predicate( UseSSE<=1 );
11329  match(Set dst (ConvI2D src));
11330  format %{ "FILD   $src\n\t"
11331            "FSTP   $dst" %}
11332  opcode(0xDB, 0x0);  /* DB /0 */
11333  ins_encode(Push_Mem_I(src), Pop_Reg_D(dst));
11334  ins_pipe( fpu_reg_mem );
11335%}
11336
11337instruct convI2XD_reg(regXD dst, eRegI src) %{
11338  predicate( UseSSE>=2 && !UseXmmI2D );
11339  match(Set dst (ConvI2D src));
11340  format %{ "CVTSI2SD $dst,$src" %}
11341  opcode(0xF2, 0x0F, 0x2A);
11342  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11343  ins_pipe( pipe_slow );
11344%}
11345
11346instruct convI2XD_mem(regXD dst, memory mem) %{
11347  predicate( UseSSE>=2 );
11348  match(Set dst (ConvI2D (LoadI mem)));
11349  format %{ "CVTSI2SD $dst,$mem" %}
11350  opcode(0xF2, 0x0F, 0x2A);
11351  ins_encode( OpcP, OpcS, Opcode(tertiary), RegMem(dst, mem));
11352  ins_pipe( pipe_slow );
11353%}
11354
11355instruct convXI2XD_reg(regXD dst, eRegI src)
11356%{
11357  predicate( UseSSE>=2 && UseXmmI2D );
11358  match(Set dst (ConvI2D src));
11359
11360  format %{ "MOVD  $dst,$src\n\t"
11361            "CVTDQ2PD $dst,$dst\t# i2d" %}
11362  ins_encode %{
11363    __ movdl($dst$$XMMRegister, $src$$Register);
11364    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
11365  %}
11366  ins_pipe(pipe_slow); // XXX
11367%}
11368
11369instruct convI2D_mem(regD dst, memory mem) %{
11370  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
11371  match(Set dst (ConvI2D (LoadI mem)));
11372  format %{ "FILD   $mem\n\t"
11373            "FSTP   $dst" %}
11374  opcode(0xDB);      /* DB /0 */
11375  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11376              Pop_Reg_D(dst));
11377  ins_pipe( fpu_reg_mem );
11378%}
11379
11380// Convert a byte to a float; no rounding step needed.
11381instruct conv24I2F_reg(regF dst, stackSlotI src) %{
11382  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11383  match(Set dst (ConvI2F src));
11384  format %{ "FILD   $src\n\t"
11385            "FSTP   $dst" %}
11386
11387  opcode(0xDB, 0x0);  /* DB /0 */
11388  ins_encode(Push_Mem_I(src), Pop_Reg_F(dst));
11389  ins_pipe( fpu_reg_mem );
11390%}
11391
11392// In 24-bit mode, force exponent rounding by storing back out
11393instruct convI2F_SSF(stackSlotF dst, stackSlotI src) %{
11394  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11395  match(Set dst (ConvI2F src));
11396  ins_cost(200);
11397  format %{ "FILD   $src\n\t"
11398            "FSTP_S $dst" %}
11399  opcode(0xDB, 0x0);  /* DB /0 */
11400  ins_encode( Push_Mem_I(src),
11401              Pop_Mem_F(dst));
11402  ins_pipe( fpu_mem_mem );
11403%}
11404
11405// In 24-bit mode, force exponent rounding by storing back out
11406instruct convI2F_SSF_mem(stackSlotF dst, memory mem) %{
11407  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11408  match(Set dst (ConvI2F (LoadI mem)));
11409  ins_cost(200);
11410  format %{ "FILD   $mem\n\t"
11411            "FSTP_S $dst" %}
11412  opcode(0xDB);  /* DB /0 */
11413  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11414              Pop_Mem_F(dst));
11415  ins_pipe( fpu_mem_mem );
11416%}
11417
11418// This instruction does not round to 24-bits
11419instruct convI2F_reg(regF dst, stackSlotI src) %{
11420  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11421  match(Set dst (ConvI2F src));
11422  format %{ "FILD   $src\n\t"
11423            "FSTP   $dst" %}
11424  opcode(0xDB, 0x0);  /* DB /0 */
11425  ins_encode( Push_Mem_I(src),
11426              Pop_Reg_F(dst));
11427  ins_pipe( fpu_reg_mem );
11428%}
11429
11430// This instruction does not round to 24-bits
11431instruct convI2F_mem(regF dst, memory mem) %{
11432  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11433  match(Set dst (ConvI2F (LoadI mem)));
11434  format %{ "FILD   $mem\n\t"
11435            "FSTP   $dst" %}
11436  opcode(0xDB);      /* DB /0 */
11437  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11438              Pop_Reg_F(dst));
11439  ins_pipe( fpu_reg_mem );
11440%}
11441
11442// Convert an int to a float in xmm; no rounding step needed.
11443instruct convI2X_reg(regX dst, eRegI src) %{
11444  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11445  match(Set dst (ConvI2F src));
11446  format %{ "CVTSI2SS $dst, $src" %}
11447
11448  opcode(0xF3, 0x0F, 0x2A);  /* F3 0F 2A /r */
11449  ins_encode( OpcP, OpcS, Opcode(tertiary), RegReg(dst, src));
11450  ins_pipe( pipe_slow );
11451%}
11452
11453 instruct convXI2X_reg(regX dst, eRegI src)
11454%{
11455  predicate( UseSSE>=2 && UseXmmI2F );
11456  match(Set dst (ConvI2F src));
11457
11458  format %{ "MOVD  $dst,$src\n\t"
11459            "CVTDQ2PS $dst,$dst\t# i2f" %}
11460  ins_encode %{
11461    __ movdl($dst$$XMMRegister, $src$$Register);
11462    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11463  %}
11464  ins_pipe(pipe_slow); // XXX
11465%}
11466
11467instruct convI2L_reg( eRegL dst, eRegI src, eFlagsReg cr) %{
11468  match(Set dst (ConvI2L src));
11469  effect(KILL cr);
11470  format %{ "MOV    $dst.lo,$src\n\t"
11471            "MOV    $dst.hi,$src\n\t"
11472            "SAR    $dst.hi,31" %}
11473  ins_encode(convert_int_long(dst,src));
11474  ins_pipe( ialu_reg_reg_long );
11475%}
11476
11477// Zero-extend convert int to long
11478instruct convI2L_reg_zex(eRegL dst, eRegI src, immL_32bits mask, eFlagsReg flags ) %{
11479  match(Set dst (AndL (ConvI2L src) mask) );
11480  effect( KILL flags );
11481  format %{ "MOV    $dst.lo,$src\n\t"
11482            "XOR    $dst.hi,$dst.hi" %}
11483  opcode(0x33); // XOR
11484  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11485  ins_pipe( ialu_reg_reg_long );
11486%}
11487
11488// Zero-extend long
11489instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11490  match(Set dst (AndL src mask) );
11491  effect( KILL flags );
11492  format %{ "MOV    $dst.lo,$src.lo\n\t"
11493            "XOR    $dst.hi,$dst.hi\n\t" %}
11494  opcode(0x33); // XOR
11495  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11496  ins_pipe( ialu_reg_reg_long );
11497%}
11498
11499instruct convL2D_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11500  predicate (UseSSE<=1);
11501  match(Set dst (ConvL2D src));
11502  effect( KILL cr );
11503  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11504            "PUSH   $src.lo\n\t"
11505            "FILD   ST,[ESP + #0]\n\t"
11506            "ADD    ESP,8\n\t"
11507            "FSTP_D $dst\t# D-round" %}
11508  opcode(0xDF, 0x5);  /* DF /5 */
11509  ins_encode(convert_long_double(src), Pop_Mem_D(dst));
11510  ins_pipe( pipe_slow );
11511%}
11512
11513instruct convL2XD_reg( regXD dst, eRegL src, eFlagsReg cr) %{
11514  predicate (UseSSE>=2);
11515  match(Set dst (ConvL2D src));
11516  effect( KILL cr );
11517  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11518            "PUSH   $src.lo\n\t"
11519            "FILD_D [ESP]\n\t"
11520            "FSTP_D [ESP]\n\t"
11521            "MOVSD  $dst,[ESP]\n\t"
11522            "ADD    ESP,8" %}
11523  opcode(0xDF, 0x5);  /* DF /5 */
11524  ins_encode(convert_long_double2(src), Push_ResultXD(dst));
11525  ins_pipe( pipe_slow );
11526%}
11527
11528instruct convL2X_reg( regX dst, eRegL src, eFlagsReg cr) %{
11529  predicate (UseSSE>=1);
11530  match(Set dst (ConvL2F src));
11531  effect( KILL cr );
11532  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11533            "PUSH   $src.lo\n\t"
11534            "FILD_D [ESP]\n\t"
11535            "FSTP_S [ESP]\n\t"
11536            "MOVSS  $dst,[ESP]\n\t"
11537            "ADD    ESP,8" %}
11538  opcode(0xDF, 0x5);  /* DF /5 */
11539  ins_encode(convert_long_double2(src), Push_ResultX(dst,0x8));
11540  ins_pipe( pipe_slow );
11541%}
11542
11543instruct convL2F_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11544  match(Set dst (ConvL2F src));
11545  effect( KILL cr );
11546  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11547            "PUSH   $src.lo\n\t"
11548            "FILD   ST,[ESP + #0]\n\t"
11549            "ADD    ESP,8\n\t"
11550            "FSTP_S $dst\t# F-round" %}
11551  opcode(0xDF, 0x5);  /* DF /5 */
11552  ins_encode(convert_long_double(src), Pop_Mem_F(dst));
11553  ins_pipe( pipe_slow );
11554%}
11555
11556instruct convL2I_reg( eRegI dst, eRegL src ) %{
11557  match(Set dst (ConvL2I src));
11558  effect( DEF dst, USE src );
11559  format %{ "MOV    $dst,$src.lo" %}
11560  ins_encode(enc_CopyL_Lo(dst,src));
11561  ins_pipe( ialu_reg_reg );
11562%}
11563
11564
11565instruct MoveF2I_stack_reg(eRegI dst, stackSlotF src) %{
11566  match(Set dst (MoveF2I src));
11567  effect( DEF dst, USE src );
11568  ins_cost(100);
11569  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11570  opcode(0x8B);
11571  ins_encode( OpcP, RegMem(dst,src));
11572  ins_pipe( ialu_reg_mem );
11573%}
11574
11575instruct MoveF2I_reg_stack(stackSlotI dst, regF src) %{
11576  predicate(UseSSE==0);
11577  match(Set dst (MoveF2I src));
11578  effect( DEF dst, USE src );
11579
11580  ins_cost(125);
11581  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11582  ins_encode( Pop_Mem_Reg_F(dst, src) );
11583  ins_pipe( fpu_mem_reg );
11584%}
11585
11586instruct MoveF2I_reg_stack_sse(stackSlotI dst, regX src) %{
11587  predicate(UseSSE>=1);
11588  match(Set dst (MoveF2I src));
11589  effect( DEF dst, USE src );
11590
11591  ins_cost(95);
11592  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11593  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x11), RegMem(src, dst));
11594  ins_pipe( pipe_slow );
11595%}
11596
11597instruct MoveF2I_reg_reg_sse(eRegI dst, regX src) %{
11598  predicate(UseSSE>=2);
11599  match(Set dst (MoveF2I src));
11600  effect( DEF dst, USE src );
11601  ins_cost(85);
11602  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11603  ins_encode( MovX2I_reg(dst, src));
11604  ins_pipe( pipe_slow );
11605%}
11606
11607instruct MoveI2F_reg_stack(stackSlotF dst, eRegI src) %{
11608  match(Set dst (MoveI2F src));
11609  effect( DEF dst, USE src );
11610
11611  ins_cost(100);
11612  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11613  opcode(0x89);
11614  ins_encode( OpcPRegSS( dst, src ) );
11615  ins_pipe( ialu_mem_reg );
11616%}
11617
11618
11619instruct MoveI2F_stack_reg(regF dst, stackSlotI src) %{
11620  predicate(UseSSE==0);
11621  match(Set dst (MoveI2F src));
11622  effect(DEF dst, USE src);
11623
11624  ins_cost(125);
11625  format %{ "FLD_S  $src\n\t"
11626            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11627  opcode(0xD9);               /* D9 /0, FLD m32real */
11628  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11629              Pop_Reg_F(dst) );
11630  ins_pipe( fpu_reg_mem );
11631%}
11632
11633instruct MoveI2F_stack_reg_sse(regX dst, stackSlotI src) %{
11634  predicate(UseSSE>=1);
11635  match(Set dst (MoveI2F src));
11636  effect( DEF dst, USE src );
11637
11638  ins_cost(95);
11639  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11640  ins_encode( Opcode(0xF3), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
11641  ins_pipe( pipe_slow );
11642%}
11643
11644instruct MoveI2F_reg_reg_sse(regX dst, eRegI src) %{
11645  predicate(UseSSE>=2);
11646  match(Set dst (MoveI2F src));
11647  effect( DEF dst, USE src );
11648
11649  ins_cost(85);
11650  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11651  ins_encode( MovI2X_reg(dst, src) );
11652  ins_pipe( pipe_slow );
11653%}
11654
11655instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11656  match(Set dst (MoveD2L src));
11657  effect(DEF dst, USE src);
11658
11659  ins_cost(250);
11660  format %{ "MOV    $dst.lo,$src\n\t"
11661            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11662  opcode(0x8B, 0x8B);
11663  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11664  ins_pipe( ialu_mem_long_reg );
11665%}
11666
11667instruct MoveD2L_reg_stack(stackSlotL dst, regD src) %{
11668  predicate(UseSSE<=1);
11669  match(Set dst (MoveD2L src));
11670  effect(DEF dst, USE src);
11671
11672  ins_cost(125);
11673  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11674  ins_encode( Pop_Mem_Reg_D(dst, src) );
11675  ins_pipe( fpu_mem_reg );
11676%}
11677
11678instruct MoveD2L_reg_stack_sse(stackSlotL dst, regXD src) %{
11679  predicate(UseSSE>=2);
11680  match(Set dst (MoveD2L src));
11681  effect(DEF dst, USE src);
11682  ins_cost(95);
11683
11684  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11685  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x11), RegMem(src,dst));
11686  ins_pipe( pipe_slow );
11687%}
11688
11689instruct MoveD2L_reg_reg_sse(eRegL dst, regXD src, regXD tmp) %{
11690  predicate(UseSSE>=2);
11691  match(Set dst (MoveD2L src));
11692  effect(DEF dst, USE src, TEMP tmp);
11693  ins_cost(85);
11694  format %{ "MOVD   $dst.lo,$src\n\t"
11695            "PSHUFLW $tmp,$src,0x4E\n\t"
11696            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11697  ins_encode( MovXD2L_reg(dst, src, tmp) );
11698  ins_pipe( pipe_slow );
11699%}
11700
11701instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11702  match(Set dst (MoveL2D src));
11703  effect(DEF dst, USE src);
11704
11705  ins_cost(200);
11706  format %{ "MOV    $dst,$src.lo\n\t"
11707            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11708  opcode(0x89, 0x89);
11709  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11710  ins_pipe( ialu_mem_long_reg );
11711%}
11712
11713
11714instruct MoveL2D_stack_reg(regD dst, stackSlotL src) %{
11715  predicate(UseSSE<=1);
11716  match(Set dst (MoveL2D src));
11717  effect(DEF dst, USE src);
11718  ins_cost(125);
11719
11720  format %{ "FLD_D  $src\n\t"
11721            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11722  opcode(0xDD);               /* DD /0, FLD m64real */
11723  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11724              Pop_Reg_D(dst) );
11725  ins_pipe( fpu_reg_mem );
11726%}
11727
11728
11729instruct MoveL2D_stack_reg_sse(regXD dst, stackSlotL src) %{
11730  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11731  match(Set dst (MoveL2D src));
11732  effect(DEF dst, USE src);
11733
11734  ins_cost(95);
11735  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11736  ins_encode( Opcode(0xF2), Opcode(0x0F), Opcode(0x10), RegMem(dst,src));
11737  ins_pipe( pipe_slow );
11738%}
11739
11740instruct MoveL2D_stack_reg_sse_partial(regXD dst, stackSlotL src) %{
11741  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11742  match(Set dst (MoveL2D src));
11743  effect(DEF dst, USE src);
11744
11745  ins_cost(95);
11746  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11747  ins_encode( Opcode(0x66), Opcode(0x0F), Opcode(0x12), RegMem(dst,src));
11748  ins_pipe( pipe_slow );
11749%}
11750
11751instruct MoveL2D_reg_reg_sse(regXD dst, eRegL src, regXD tmp) %{
11752  predicate(UseSSE>=2);
11753  match(Set dst (MoveL2D src));
11754  effect(TEMP dst, USE src, TEMP tmp);
11755  ins_cost(85);
11756  format %{ "MOVD   $dst,$src.lo\n\t"
11757            "MOVD   $tmp,$src.hi\n\t"
11758            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11759  ins_encode( MovL2XD_reg(dst, src, tmp) );
11760  ins_pipe( pipe_slow );
11761%}
11762
11763// Replicate scalar to packed byte (1 byte) values in xmm
11764instruct Repl8B_reg(regXD dst, regXD src) %{
11765  predicate(UseSSE>=2);
11766  match(Set dst (Replicate8B src));
11767  format %{ "MOVDQA  $dst,$src\n\t"
11768            "PUNPCKLBW $dst,$dst\n\t"
11769            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11770  ins_encode( pshufd_8x8(dst, src));
11771  ins_pipe( pipe_slow );
11772%}
11773
11774// Replicate scalar to packed byte (1 byte) values in xmm
11775instruct Repl8B_eRegI(regXD dst, eRegI src) %{
11776  predicate(UseSSE>=2);
11777  match(Set dst (Replicate8B src));
11778  format %{ "MOVD    $dst,$src\n\t"
11779            "PUNPCKLBW $dst,$dst\n\t"
11780            "PSHUFLW $dst,$dst,0x00\t! replicate8B" %}
11781  ins_encode( mov_i2x(dst, src), pshufd_8x8(dst, dst));
11782  ins_pipe( pipe_slow );
11783%}
11784
11785// Replicate scalar zero to packed byte (1 byte) values in xmm
11786instruct Repl8B_immI0(regXD dst, immI0 zero) %{
11787  predicate(UseSSE>=2);
11788  match(Set dst (Replicate8B zero));
11789  format %{ "PXOR  $dst,$dst\t! replicate8B" %}
11790  ins_encode( pxor(dst, dst));
11791  ins_pipe( fpu_reg_reg );
11792%}
11793
11794// Replicate scalar to packed shore (2 byte) values in xmm
11795instruct Repl4S_reg(regXD dst, regXD src) %{
11796  predicate(UseSSE>=2);
11797  match(Set dst (Replicate4S src));
11798  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4S" %}
11799  ins_encode( pshufd_4x16(dst, src));
11800  ins_pipe( fpu_reg_reg );
11801%}
11802
11803// Replicate scalar to packed shore (2 byte) values in xmm
11804instruct Repl4S_eRegI(regXD dst, eRegI src) %{
11805  predicate(UseSSE>=2);
11806  match(Set dst (Replicate4S src));
11807  format %{ "MOVD    $dst,$src\n\t"
11808            "PSHUFLW $dst,$dst,0x00\t! replicate4S" %}
11809  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11810  ins_pipe( fpu_reg_reg );
11811%}
11812
11813// Replicate scalar zero to packed short (2 byte) values in xmm
11814instruct Repl4S_immI0(regXD dst, immI0 zero) %{
11815  predicate(UseSSE>=2);
11816  match(Set dst (Replicate4S zero));
11817  format %{ "PXOR  $dst,$dst\t! replicate4S" %}
11818  ins_encode( pxor(dst, dst));
11819  ins_pipe( fpu_reg_reg );
11820%}
11821
11822// Replicate scalar to packed char (2 byte) values in xmm
11823instruct Repl4C_reg(regXD dst, regXD src) %{
11824  predicate(UseSSE>=2);
11825  match(Set dst (Replicate4C src));
11826  format %{ "PSHUFLW $dst,$src,0x00\t! replicate4C" %}
11827  ins_encode( pshufd_4x16(dst, src));
11828  ins_pipe( fpu_reg_reg );
11829%}
11830
11831// Replicate scalar to packed char (2 byte) values in xmm
11832instruct Repl4C_eRegI(regXD dst, eRegI src) %{
11833  predicate(UseSSE>=2);
11834  match(Set dst (Replicate4C src));
11835  format %{ "MOVD    $dst,$src\n\t"
11836            "PSHUFLW $dst,$dst,0x00\t! replicate4C" %}
11837  ins_encode( mov_i2x(dst, src), pshufd_4x16(dst, dst));
11838  ins_pipe( fpu_reg_reg );
11839%}
11840
11841// Replicate scalar zero to packed char (2 byte) values in xmm
11842instruct Repl4C_immI0(regXD dst, immI0 zero) %{
11843  predicate(UseSSE>=2);
11844  match(Set dst (Replicate4C zero));
11845  format %{ "PXOR  $dst,$dst\t! replicate4C" %}
11846  ins_encode( pxor(dst, dst));
11847  ins_pipe( fpu_reg_reg );
11848%}
11849
11850// Replicate scalar to packed integer (4 byte) values in xmm
11851instruct Repl2I_reg(regXD dst, regXD src) %{
11852  predicate(UseSSE>=2);
11853  match(Set dst (Replicate2I src));
11854  format %{ "PSHUFD $dst,$src,0x00\t! replicate2I" %}
11855  ins_encode( pshufd(dst, src, 0x00));
11856  ins_pipe( fpu_reg_reg );
11857%}
11858
11859// Replicate scalar to packed integer (4 byte) values in xmm
11860instruct Repl2I_eRegI(regXD dst, eRegI src) %{
11861  predicate(UseSSE>=2);
11862  match(Set dst (Replicate2I src));
11863  format %{ "MOVD   $dst,$src\n\t"
11864            "PSHUFD $dst,$dst,0x00\t! replicate2I" %}
11865  ins_encode( mov_i2x(dst, src), pshufd(dst, dst, 0x00));
11866  ins_pipe( fpu_reg_reg );
11867%}
11868
11869// Replicate scalar zero to packed integer (2 byte) values in xmm
11870instruct Repl2I_immI0(regXD dst, immI0 zero) %{
11871  predicate(UseSSE>=2);
11872  match(Set dst (Replicate2I zero));
11873  format %{ "PXOR  $dst,$dst\t! replicate2I" %}
11874  ins_encode( pxor(dst, dst));
11875  ins_pipe( fpu_reg_reg );
11876%}
11877
11878// Replicate scalar to packed single precision floating point values in xmm
11879instruct Repl2F_reg(regXD dst, regXD src) %{
11880  predicate(UseSSE>=2);
11881  match(Set dst (Replicate2F src));
11882  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11883  ins_encode( pshufd(dst, src, 0xe0));
11884  ins_pipe( fpu_reg_reg );
11885%}
11886
11887// Replicate scalar to packed single precision floating point values in xmm
11888instruct Repl2F_regX(regXD dst, regX src) %{
11889  predicate(UseSSE>=2);
11890  match(Set dst (Replicate2F src));
11891  format %{ "PSHUFD $dst,$src,0xe0\t! replicate2F" %}
11892  ins_encode( pshufd(dst, src, 0xe0));
11893  ins_pipe( fpu_reg_reg );
11894%}
11895
11896// Replicate scalar to packed single precision floating point values in xmm
11897instruct Repl2F_immXF0(regXD dst, immXF0 zero) %{
11898  predicate(UseSSE>=2);
11899  match(Set dst (Replicate2F zero));
11900  format %{ "PXOR  $dst,$dst\t! replicate2F" %}
11901  ins_encode( pxor(dst, dst));
11902  ins_pipe( fpu_reg_reg );
11903%}
11904
11905
11906
11907// =======================================================================
11908// fast clearing of an array
11909
11910instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11911  match(Set dummy (ClearArray cnt base));
11912  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11913  format %{ "SHL    ECX,1\t# Convert doublewords to words\n\t"
11914            "XOR    EAX,EAX\n\t"
11915            "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11916  opcode(0,0x4);
11917  ins_encode( Opcode(0xD1), RegOpc(ECX),
11918              OpcRegReg(0x33,EAX,EAX),
11919              Opcode(0xF3), Opcode(0xAB) );
11920  ins_pipe( pipe_slow );
11921%}
11922
11923instruct string_compare(eDIRegP str1, eSIRegP str2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
11924  match(Set result (StrComp str1 str2));
11925  effect(USE_KILL str1, USE_KILL str2, KILL tmp1, KILL tmp2, KILL cr);
11926  //ins_cost(300);
11927
11928  format %{ "String Compare $str1,$str2 -> $result    // KILL EAX, EBX" %}
11929  ins_encode( enc_String_Compare() );
11930  ins_pipe( pipe_slow );
11931%}
11932
11933// fast array equals
11934instruct array_equals(eDIRegP ary1, eSIRegP ary2, eAXRegI tmp1, eBXRegI tmp2, eCXRegI result, eFlagsReg cr) %{
11935  match(Set result (AryEq ary1 ary2));
11936  effect(USE_KILL ary1, USE_KILL ary2, KILL tmp1, KILL tmp2, KILL cr);
11937  //ins_cost(300);
11938
11939  format %{ "Array Equals $ary1,$ary2 -> $result    // KILL EAX, EBX" %}
11940  ins_encode( enc_Array_Equals(ary1, ary2, tmp1, tmp2, result) );
11941  ins_pipe( pipe_slow );
11942%}
11943
11944//----------Control Flow Instructions------------------------------------------
11945// Signed compare Instructions
11946instruct compI_eReg(eFlagsReg cr, eRegI op1, eRegI op2) %{
11947  match(Set cr (CmpI op1 op2));
11948  effect( DEF cr, USE op1, USE op2 );
11949  format %{ "CMP    $op1,$op2" %}
11950  opcode(0x3B);  /* Opcode 3B /r */
11951  ins_encode( OpcP, RegReg( op1, op2) );
11952  ins_pipe( ialu_cr_reg_reg );
11953%}
11954
11955instruct compI_eReg_imm(eFlagsReg cr, eRegI op1, immI op2) %{
11956  match(Set cr (CmpI op1 op2));
11957  effect( DEF cr, USE op1 );
11958  format %{ "CMP    $op1,$op2" %}
11959  opcode(0x81,0x07);  /* Opcode 81 /7 */
11960  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11961  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11962  ins_pipe( ialu_cr_reg_imm );
11963%}
11964
11965// Cisc-spilled version of cmpI_eReg
11966instruct compI_eReg_mem(eFlagsReg cr, eRegI op1, memory op2) %{
11967  match(Set cr (CmpI op1 (LoadI op2)));
11968
11969  format %{ "CMP    $op1,$op2" %}
11970  ins_cost(500);
11971  opcode(0x3B);  /* Opcode 3B /r */
11972  ins_encode( OpcP, RegMem( op1, op2) );
11973  ins_pipe( ialu_cr_reg_mem );
11974%}
11975
11976instruct testI_reg( eFlagsReg cr, eRegI src, immI0 zero ) %{
11977  match(Set cr (CmpI src zero));
11978  effect( DEF cr, USE src );
11979
11980  format %{ "TEST   $src,$src" %}
11981  opcode(0x85);
11982  ins_encode( OpcP, RegReg( src, src ) );
11983  ins_pipe( ialu_cr_reg_imm );
11984%}
11985
11986instruct testI_reg_imm( eFlagsReg cr, eRegI src, immI con, immI0 zero ) %{
11987  match(Set cr (CmpI (AndI src con) zero));
11988
11989  format %{ "TEST   $src,$con" %}
11990  opcode(0xF7,0x00);
11991  ins_encode( OpcP, RegOpc(src), Con32(con) );
11992  ins_pipe( ialu_cr_reg_imm );
11993%}
11994
11995instruct testI_reg_mem( eFlagsReg cr, eRegI src, memory mem, immI0 zero ) %{
11996  match(Set cr (CmpI (AndI src mem) zero));
11997
11998  format %{ "TEST   $src,$mem" %}
11999  opcode(0x85);
12000  ins_encode( OpcP, RegMem( src, mem ) );
12001  ins_pipe( ialu_cr_reg_mem );
12002%}
12003
12004// Unsigned compare Instructions; really, same as signed except they
12005// produce an eFlagsRegU instead of eFlagsReg.
12006instruct compU_eReg(eFlagsRegU cr, eRegI op1, eRegI op2) %{
12007  match(Set cr (CmpU op1 op2));
12008
12009  format %{ "CMPu   $op1,$op2" %}
12010  opcode(0x3B);  /* Opcode 3B /r */
12011  ins_encode( OpcP, RegReg( op1, op2) );
12012  ins_pipe( ialu_cr_reg_reg );
12013%}
12014
12015instruct compU_eReg_imm(eFlagsRegU cr, eRegI op1, immI op2) %{
12016  match(Set cr (CmpU op1 op2));
12017
12018  format %{ "CMPu   $op1,$op2" %}
12019  opcode(0x81,0x07);  /* Opcode 81 /7 */
12020  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12021  ins_pipe( ialu_cr_reg_imm );
12022%}
12023
12024// // Cisc-spilled version of cmpU_eReg
12025instruct compU_eReg_mem(eFlagsRegU cr, eRegI op1, memory op2) %{
12026  match(Set cr (CmpU op1 (LoadI op2)));
12027
12028  format %{ "CMPu   $op1,$op2" %}
12029  ins_cost(500);
12030  opcode(0x3B);  /* Opcode 3B /r */
12031  ins_encode( OpcP, RegMem( op1, op2) );
12032  ins_pipe( ialu_cr_reg_mem );
12033%}
12034
12035// // Cisc-spilled version of cmpU_eReg
12036//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, eRegI op2) %{
12037//  match(Set cr (CmpU (LoadI op1) op2));
12038//
12039//  format %{ "CMPu   $op1,$op2" %}
12040//  ins_cost(500);
12041//  opcode(0x39);  /* Opcode 39 /r */
12042//  ins_encode( OpcP, RegMem( op1, op2) );
12043//%}
12044
12045instruct testU_reg( eFlagsRegU cr, eRegI src, immI0 zero ) %{
12046  match(Set cr (CmpU src zero));
12047
12048  format %{ "TESTu  $src,$src" %}
12049  opcode(0x85);
12050  ins_encode( OpcP, RegReg( src, src ) );
12051  ins_pipe( ialu_cr_reg_imm );
12052%}
12053
12054// Unsigned pointer compare Instructions
12055instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
12056  match(Set cr (CmpP op1 op2));
12057
12058  format %{ "CMPu   $op1,$op2" %}
12059  opcode(0x3B);  /* Opcode 3B /r */
12060  ins_encode( OpcP, RegReg( op1, op2) );
12061  ins_pipe( ialu_cr_reg_reg );
12062%}
12063
12064instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
12065  match(Set cr (CmpP op1 op2));
12066
12067  format %{ "CMPu   $op1,$op2" %}
12068  opcode(0x81,0x07);  /* Opcode 81 /7 */
12069  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
12070  ins_pipe( ialu_cr_reg_imm );
12071%}
12072
12073// // Cisc-spilled version of cmpP_eReg
12074instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
12075  match(Set cr (CmpP op1 (LoadP op2)));
12076
12077  format %{ "CMPu   $op1,$op2" %}
12078  ins_cost(500);
12079  opcode(0x3B);  /* Opcode 3B /r */
12080  ins_encode( OpcP, RegMem( op1, op2) );
12081  ins_pipe( ialu_cr_reg_mem );
12082%}
12083
12084// // Cisc-spilled version of cmpP_eReg
12085//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
12086//  match(Set cr (CmpP (LoadP op1) op2));
12087//
12088//  format %{ "CMPu   $op1,$op2" %}
12089//  ins_cost(500);
12090//  opcode(0x39);  /* Opcode 39 /r */
12091//  ins_encode( OpcP, RegMem( op1, op2) );
12092//%}
12093
12094// Compare raw pointer (used in out-of-heap check).
12095// Only works because non-oop pointers must be raw pointers
12096// and raw pointers have no anti-dependencies.
12097instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
12098  predicate( !n->in(2)->in(2)->bottom_type()->isa_oop_ptr() );
12099  match(Set cr (CmpP op1 (LoadP op2)));
12100
12101  format %{ "CMPu   $op1,$op2" %}
12102  opcode(0x3B);  /* Opcode 3B /r */
12103  ins_encode( OpcP, RegMem( op1, op2) );
12104  ins_pipe( ialu_cr_reg_mem );
12105%}
12106
12107//
12108// This will generate a signed flags result. This should be ok
12109// since any compare to a zero should be eq/neq.
12110instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
12111  match(Set cr (CmpP src zero));
12112
12113  format %{ "TEST   $src,$src" %}
12114  opcode(0x85);
12115  ins_encode( OpcP, RegReg( src, src ) );
12116  ins_pipe( ialu_cr_reg_imm );
12117%}
12118
12119// Cisc-spilled version of testP_reg
12120// This will generate a signed flags result. This should be ok
12121// since any compare to a zero should be eq/neq.
12122instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
12123  match(Set cr (CmpP (LoadP op) zero));
12124
12125  format %{ "TEST   $op,0xFFFFFFFF" %}
12126  ins_cost(500);
12127  opcode(0xF7);               /* Opcode F7 /0 */
12128  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
12129  ins_pipe( ialu_cr_reg_imm );
12130%}
12131
12132// Yanked all unsigned pointer compare operations.
12133// Pointer compares are done with CmpP which is already unsigned.
12134
12135//----------Max and Min--------------------------------------------------------
12136// Min Instructions
12137////
12138//   *** Min and Max using the conditional move are slower than the
12139//   *** branch version on a Pentium III.
12140// // Conditional move for min
12141//instruct cmovI_reg_lt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12142//  effect( USE_DEF op2, USE op1, USE cr );
12143//  format %{ "CMOVlt $op2,$op1\t! min" %}
12144//  opcode(0x4C,0x0F);
12145//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12146//  ins_pipe( pipe_cmov_reg );
12147//%}
12148//
12149//// Min Register with Register (P6 version)
12150//instruct minI_eReg_p6( eRegI op1, eRegI op2 ) %{
12151//  predicate(VM_Version::supports_cmov() );
12152//  match(Set op2 (MinI op1 op2));
12153//  ins_cost(200);
12154//  expand %{
12155//    eFlagsReg cr;
12156//    compI_eReg(cr,op1,op2);
12157//    cmovI_reg_lt(op2,op1,cr);
12158//  %}
12159//%}
12160
12161// Min Register with Register (generic version)
12162instruct minI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12163  match(Set dst (MinI dst src));
12164  effect(KILL flags);
12165  ins_cost(300);
12166
12167  format %{ "MIN    $dst,$src" %}
12168  opcode(0xCC);
12169  ins_encode( min_enc(dst,src) );
12170  ins_pipe( pipe_slow );
12171%}
12172
12173// Max Register with Register
12174//   *** Min and Max using the conditional move are slower than the
12175//   *** branch version on a Pentium III.
12176// // Conditional move for max
12177//instruct cmovI_reg_gt( eRegI op2, eRegI op1, eFlagsReg cr ) %{
12178//  effect( USE_DEF op2, USE op1, USE cr );
12179//  format %{ "CMOVgt $op2,$op1\t! max" %}
12180//  opcode(0x4F,0x0F);
12181//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
12182//  ins_pipe( pipe_cmov_reg );
12183//%}
12184//
12185// // Max Register with Register (P6 version)
12186//instruct maxI_eReg_p6( eRegI op1, eRegI op2 ) %{
12187//  predicate(VM_Version::supports_cmov() );
12188//  match(Set op2 (MaxI op1 op2));
12189//  ins_cost(200);
12190//  expand %{
12191//    eFlagsReg cr;
12192//    compI_eReg(cr,op1,op2);
12193//    cmovI_reg_gt(op2,op1,cr);
12194//  %}
12195//%}
12196
12197// Max Register with Register (generic version)
12198instruct maxI_eReg(eRegI dst, eRegI src, eFlagsReg flags) %{
12199  match(Set dst (MaxI dst src));
12200  effect(KILL flags);
12201  ins_cost(300);
12202
12203  format %{ "MAX    $dst,$src" %}
12204  opcode(0xCC);
12205  ins_encode( max_enc(dst,src) );
12206  ins_pipe( pipe_slow );
12207%}
12208
12209// ============================================================================
12210// Branch Instructions
12211// Jump Table
12212instruct jumpXtnd(eRegI switch_val) %{
12213  match(Jump switch_val);
12214  ins_cost(350);
12215
12216  format %{  "JMP    [table_base](,$switch_val,1)\n\t" %}
12217
12218  ins_encode %{
12219    address table_base  = __ address_table_constant(_index2label);
12220
12221    // Jump to Address(table_base + switch_reg)
12222    InternalAddress table(table_base);
12223    Address index(noreg, $switch_val$$Register, Address::times_1);
12224    __ jump(ArrayAddress(table, index));
12225  %}
12226  ins_pc_relative(1);
12227  ins_pipe(pipe_jmp);
12228%}
12229
12230// Jump Direct - Label defines a relative address from JMP+1
12231instruct jmpDir(label labl) %{
12232  match(Goto);
12233  effect(USE labl);
12234
12235  ins_cost(300);
12236  format %{ "JMP    $labl" %}
12237  size(5);
12238  opcode(0xE9);
12239  ins_encode( OpcP, Lbl( labl ) );
12240  ins_pipe( pipe_jmp );
12241  ins_pc_relative(1);
12242%}
12243
12244// Jump Direct Conditional - Label defines a relative address from Jcc+1
12245instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12246  match(If cop cr);
12247  effect(USE labl);
12248
12249  ins_cost(300);
12250  format %{ "J$cop    $labl" %}
12251  size(6);
12252  opcode(0x0F, 0x80);
12253  ins_encode( Jcc( cop, labl) );
12254  ins_pipe( pipe_jcc );
12255  ins_pc_relative(1);
12256%}
12257
12258// Jump Direct Conditional - Label defines a relative address from Jcc+1
12259instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12260  match(CountedLoopEnd cop cr);
12261  effect(USE labl);
12262
12263  ins_cost(300);
12264  format %{ "J$cop    $labl\t# Loop end" %}
12265  size(6);
12266  opcode(0x0F, 0x80);
12267  ins_encode( Jcc( cop, labl) );
12268  ins_pipe( pipe_jcc );
12269  ins_pc_relative(1);
12270%}
12271
12272// Jump Direct Conditional - Label defines a relative address from Jcc+1
12273instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12274  match(CountedLoopEnd cop cmp);
12275  effect(USE labl);
12276
12277  ins_cost(300);
12278  format %{ "J$cop,u  $labl\t# Loop end" %}
12279  size(6);
12280  opcode(0x0F, 0x80);
12281  ins_encode( Jcc( cop, labl) );
12282  ins_pipe( pipe_jcc );
12283  ins_pc_relative(1);
12284%}
12285
12286instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12287  match(CountedLoopEnd cop cmp);
12288  effect(USE labl);
12289
12290  ins_cost(200);
12291  format %{ "J$cop,u  $labl\t# Loop end" %}
12292  size(6);
12293  opcode(0x0F, 0x80);
12294  ins_encode( Jcc( cop, labl) );
12295  ins_pipe( pipe_jcc );
12296  ins_pc_relative(1);
12297%}
12298
12299// Jump Direct Conditional - using unsigned comparison
12300instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12301  match(If cop cmp);
12302  effect(USE labl);
12303
12304  ins_cost(300);
12305  format %{ "J$cop,u  $labl" %}
12306  size(6);
12307  opcode(0x0F, 0x80);
12308  ins_encode(Jcc(cop, labl));
12309  ins_pipe(pipe_jcc);
12310  ins_pc_relative(1);
12311%}
12312
12313instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12314  match(If cop cmp);
12315  effect(USE labl);
12316
12317  ins_cost(200);
12318  format %{ "J$cop,u  $labl" %}
12319  size(6);
12320  opcode(0x0F, 0x80);
12321  ins_encode(Jcc(cop, labl));
12322  ins_pipe(pipe_jcc);
12323  ins_pc_relative(1);
12324%}
12325
12326instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12327  match(If cop cmp);
12328  effect(USE labl);
12329
12330  ins_cost(200);
12331  format %{ $$template
12332    if ($cop$$cmpcode == Assembler::notEqual) {
12333      $$emit$$"JP,u   $labl\n\t"
12334      $$emit$$"J$cop,u   $labl"
12335    } else {
12336      $$emit$$"JP,u   done\n\t"
12337      $$emit$$"J$cop,u   $labl\n\t"
12338      $$emit$$"done:"
12339    }
12340  %}
12341  size(12);
12342  opcode(0x0F, 0x80);
12343  ins_encode %{
12344    Label* l = $labl$$label;
12345    $$$emit8$primary;
12346    emit_cc(cbuf, $secondary, Assembler::parity);
12347    int parity_disp = -1;
12348    bool ok = false;
12349    if ($cop$$cmpcode == Assembler::notEqual) {
12350       // the two jumps 6 bytes apart so the jump distances are too
12351       parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12352    } else if ($cop$$cmpcode == Assembler::equal) {
12353       parity_disp = 6;
12354       ok = true;
12355    } else {
12356       ShouldNotReachHere();
12357    }
12358    emit_d32(cbuf, parity_disp);
12359    $$$emit8$primary;
12360    emit_cc(cbuf, $secondary, $cop$$cmpcode);
12361    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 4)) : 0;
12362    emit_d32(cbuf, disp);
12363  %}
12364  ins_pipe(pipe_jcc);
12365  ins_pc_relative(1);
12366%}
12367
12368// ============================================================================
12369// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12370// array for an instance of the superklass.  Set a hidden internal cache on a
12371// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12372// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12373instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12374  match(Set result (PartialSubtypeCheck sub super));
12375  effect( KILL rcx, KILL cr );
12376
12377  ins_cost(1100);  // slightly larger than the next version
12378  format %{ "CMPL   EAX,ESI\n\t"
12379            "JEQ,s  hit\n\t"
12380            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12381            "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12382            "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12383            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12384            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12385            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12386     "hit:\n\t"
12387            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12388     "miss:\t" %}
12389
12390  opcode(0x1); // Force a XOR of EDI
12391  ins_encode( enc_PartialSubtypeCheck() );
12392  ins_pipe( pipe_slow );
12393%}
12394
12395instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12396  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12397  effect( KILL rcx, KILL result );
12398
12399  ins_cost(1000);
12400  format %{ "CMPL   EAX,ESI\n\t"
12401            "JEQ,s  miss\t# Actually a hit; we are done.\n\t"
12402            "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12403            "MOV    ECX,[EDI+arrayKlass::length]\t# length to scan\n\t"
12404            "ADD    EDI,arrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12405            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12406            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12407            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12408     "miss:\t" %}
12409
12410  opcode(0x0);  // No need to XOR EDI
12411  ins_encode( enc_PartialSubtypeCheck() );
12412  ins_pipe( pipe_slow );
12413%}
12414
12415// ============================================================================
12416// Branch Instructions -- short offset versions
12417//
12418// These instructions are used to replace jumps of a long offset (the default
12419// match) with jumps of a shorter offset.  These instructions are all tagged
12420// with the ins_short_branch attribute, which causes the ADLC to suppress the
12421// match rules in general matching.  Instead, the ADLC generates a conversion
12422// method in the MachNode which can be used to do in-place replacement of the
12423// long variant with the shorter variant.  The compiler will determine if a
12424// branch can be taken by the is_short_branch_offset() predicate in the machine
12425// specific code section of the file.
12426
12427// Jump Direct - Label defines a relative address from JMP+1
12428instruct jmpDir_short(label labl) %{
12429  match(Goto);
12430  effect(USE labl);
12431
12432  ins_cost(300);
12433  format %{ "JMP,s  $labl" %}
12434  size(2);
12435  opcode(0xEB);
12436  ins_encode( OpcP, LblShort( labl ) );
12437  ins_pipe( pipe_jmp );
12438  ins_pc_relative(1);
12439  ins_short_branch(1);
12440%}
12441
12442// Jump Direct Conditional - Label defines a relative address from Jcc+1
12443instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12444  match(If cop cr);
12445  effect(USE labl);
12446
12447  ins_cost(300);
12448  format %{ "J$cop,s  $labl" %}
12449  size(2);
12450  opcode(0x70);
12451  ins_encode( JccShort( cop, labl) );
12452  ins_pipe( pipe_jcc );
12453  ins_pc_relative(1);
12454  ins_short_branch(1);
12455%}
12456
12457// Jump Direct Conditional - Label defines a relative address from Jcc+1
12458instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12459  match(CountedLoopEnd cop cr);
12460  effect(USE labl);
12461
12462  ins_cost(300);
12463  format %{ "J$cop,s  $labl\t# Loop end" %}
12464  size(2);
12465  opcode(0x70);
12466  ins_encode( JccShort( cop, labl) );
12467  ins_pipe( pipe_jcc );
12468  ins_pc_relative(1);
12469  ins_short_branch(1);
12470%}
12471
12472// Jump Direct Conditional - Label defines a relative address from Jcc+1
12473instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12474  match(CountedLoopEnd cop cmp);
12475  effect(USE labl);
12476
12477  ins_cost(300);
12478  format %{ "J$cop,us $labl\t# Loop end" %}
12479  size(2);
12480  opcode(0x70);
12481  ins_encode( JccShort( cop, labl) );
12482  ins_pipe( pipe_jcc );
12483  ins_pc_relative(1);
12484  ins_short_branch(1);
12485%}
12486
12487instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12488  match(CountedLoopEnd cop cmp);
12489  effect(USE labl);
12490
12491  ins_cost(300);
12492  format %{ "J$cop,us $labl\t# Loop end" %}
12493  size(2);
12494  opcode(0x70);
12495  ins_encode( JccShort( cop, labl) );
12496  ins_pipe( pipe_jcc );
12497  ins_pc_relative(1);
12498  ins_short_branch(1);
12499%}
12500
12501// Jump Direct Conditional - using unsigned comparison
12502instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12503  match(If cop cmp);
12504  effect(USE labl);
12505
12506  ins_cost(300);
12507  format %{ "J$cop,us $labl" %}
12508  size(2);
12509  opcode(0x70);
12510  ins_encode( JccShort( cop, labl) );
12511  ins_pipe( pipe_jcc );
12512  ins_pc_relative(1);
12513  ins_short_branch(1);
12514%}
12515
12516instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12517  match(If cop cmp);
12518  effect(USE labl);
12519
12520  ins_cost(300);
12521  format %{ "J$cop,us $labl" %}
12522  size(2);
12523  opcode(0x70);
12524  ins_encode( JccShort( cop, labl) );
12525  ins_pipe( pipe_jcc );
12526  ins_pc_relative(1);
12527  ins_short_branch(1);
12528%}
12529
12530instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12531  match(If cop cmp);
12532  effect(USE labl);
12533
12534  ins_cost(300);
12535  format %{ $$template
12536    if ($cop$$cmpcode == Assembler::notEqual) {
12537      $$emit$$"JP,u,s   $labl\n\t"
12538      $$emit$$"J$cop,u,s   $labl"
12539    } else {
12540      $$emit$$"JP,u,s   done\n\t"
12541      $$emit$$"J$cop,u,s  $labl\n\t"
12542      $$emit$$"done:"
12543    }
12544  %}
12545  size(4);
12546  opcode(0x70);
12547  ins_encode %{
12548    Label* l = $labl$$label;
12549    emit_cc(cbuf, $primary, Assembler::parity);
12550    int parity_disp = -1;
12551    if ($cop$$cmpcode == Assembler::notEqual) {
12552      parity_disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12553    } else if ($cop$$cmpcode == Assembler::equal) {
12554      parity_disp = 2;
12555    } else {
12556      ShouldNotReachHere();
12557    }
12558    emit_d8(cbuf, parity_disp);
12559    emit_cc(cbuf, $primary, $cop$$cmpcode);
12560    int disp = l ? (l->loc_pos() - (cbuf.code_size() + 1)) : 0;
12561    emit_d8(cbuf, disp);
12562    assert(-128 <= disp && disp <= 127, "Displacement too large for short jmp");
12563    assert(-128 <= parity_disp && parity_disp <= 127, "Displacement too large for short jmp");
12564  %}
12565  ins_pipe(pipe_jcc);
12566  ins_pc_relative(1);
12567  ins_short_branch(1);
12568%}
12569
12570// ============================================================================
12571// Long Compare
12572//
12573// Currently we hold longs in 2 registers.  Comparing such values efficiently
12574// is tricky.  The flavor of compare used depends on whether we are testing
12575// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12576// The GE test is the negated LT test.  The LE test can be had by commuting
12577// the operands (yielding a GE test) and then negating; negate again for the
12578// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12579// NE test is negated from that.
12580
12581// Due to a shortcoming in the ADLC, it mixes up expressions like:
12582// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12583// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12584// are collapsed internally in the ADLC's dfa-gen code.  The match for
12585// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12586// foo match ends up with the wrong leaf.  One fix is to not match both
12587// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12588// both forms beat the trinary form of long-compare and both are very useful
12589// on Intel which has so few registers.
12590
12591// Manifest a CmpL result in an integer register.  Very painful.
12592// This is the test to avoid.
12593instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12594  match(Set dst (CmpL3 src1 src2));
12595  effect( KILL flags );
12596  ins_cost(1000);
12597  format %{ "XOR    $dst,$dst\n\t"
12598            "CMP    $src1.hi,$src2.hi\n\t"
12599            "JLT,s  m_one\n\t"
12600            "JGT,s  p_one\n\t"
12601            "CMP    $src1.lo,$src2.lo\n\t"
12602            "JB,s   m_one\n\t"
12603            "JEQ,s  done\n"
12604    "p_one:\tINC    $dst\n\t"
12605            "JMP,s  done\n"
12606    "m_one:\tDEC    $dst\n"
12607     "done:" %}
12608  ins_encode %{
12609    Label p_one, m_one, done;
12610    __ xorptr($dst$$Register, $dst$$Register);
12611    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12612    __ jccb(Assembler::less,    m_one);
12613    __ jccb(Assembler::greater, p_one);
12614    __ cmpl($src1$$Register, $src2$$Register);
12615    __ jccb(Assembler::below,   m_one);
12616    __ jccb(Assembler::equal,   done);
12617    __ bind(p_one);
12618    __ incrementl($dst$$Register);
12619    __ jmpb(done);
12620    __ bind(m_one);
12621    __ decrementl($dst$$Register);
12622    __ bind(done);
12623  %}
12624  ins_pipe( pipe_slow );
12625%}
12626
12627//======
12628// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12629// compares.  Can be used for LE or GT compares by reversing arguments.
12630// NOT GOOD FOR EQ/NE tests.
12631instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12632  match( Set flags (CmpL src zero ));
12633  ins_cost(100);
12634  format %{ "TEST   $src.hi,$src.hi" %}
12635  opcode(0x85);
12636  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12637  ins_pipe( ialu_cr_reg_reg );
12638%}
12639
12640// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12641// compares.  Can be used for LE or GT compares by reversing arguments.
12642// NOT GOOD FOR EQ/NE tests.
12643instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, eRegI tmp ) %{
12644  match( Set flags (CmpL src1 src2 ));
12645  effect( TEMP tmp );
12646  ins_cost(300);
12647  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12648            "MOV    $tmp,$src1.hi\n\t"
12649            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12650  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12651  ins_pipe( ialu_cr_reg_reg );
12652%}
12653
12654// Long compares reg < zero/req OR reg >= zero/req.
12655// Just a wrapper for a normal branch, plus the predicate test.
12656instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12657  match(If cmp flags);
12658  effect(USE labl);
12659  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12660  expand %{
12661    jmpCon(cmp,flags,labl);    // JLT or JGE...
12662  %}
12663%}
12664
12665// Compare 2 longs and CMOVE longs.
12666instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12667  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12668  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12669  ins_cost(400);
12670  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12671            "CMOV$cmp $dst.hi,$src.hi" %}
12672  opcode(0x0F,0x40);
12673  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12674  ins_pipe( pipe_cmov_reg_long );
12675%}
12676
12677instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12678  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12679  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12680  ins_cost(500);
12681  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12682            "CMOV$cmp $dst.hi,$src.hi" %}
12683  opcode(0x0F,0x40);
12684  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12685  ins_pipe( pipe_cmov_reg_long );
12686%}
12687
12688// Compare 2 longs and CMOVE ints.
12689instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, eRegI src) %{
12690  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12691  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12692  ins_cost(200);
12693  format %{ "CMOV$cmp $dst,$src" %}
12694  opcode(0x0F,0x40);
12695  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12696  ins_pipe( pipe_cmov_reg );
12697%}
12698
12699instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegI dst, memory src) %{
12700  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12701  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12702  ins_cost(250);
12703  format %{ "CMOV$cmp $dst,$src" %}
12704  opcode(0x0F,0x40);
12705  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12706  ins_pipe( pipe_cmov_mem );
12707%}
12708
12709// Compare 2 longs and CMOVE ints.
12710instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12711  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12712  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12713  ins_cost(200);
12714  format %{ "CMOV$cmp $dst,$src" %}
12715  opcode(0x0F,0x40);
12716  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12717  ins_pipe( pipe_cmov_reg );
12718%}
12719
12720// Compare 2 longs and CMOVE doubles
12721instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12722  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12723  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12724  ins_cost(200);
12725  expand %{
12726    fcmovD_regS(cmp,flags,dst,src);
12727  %}
12728%}
12729
12730// Compare 2 longs and CMOVE doubles
12731instruct cmovXDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regXD dst, regXD src) %{
12732  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12733  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12734  ins_cost(200);
12735  expand %{
12736    fcmovXD_regS(cmp,flags,dst,src);
12737  %}
12738%}
12739
12740instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12741  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12742  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12743  ins_cost(200);
12744  expand %{
12745    fcmovF_regS(cmp,flags,dst,src);
12746  %}
12747%}
12748
12749instruct cmovXX_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regX dst, regX src) %{
12750  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12751  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12752  ins_cost(200);
12753  expand %{
12754    fcmovX_regS(cmp,flags,dst,src);
12755  %}
12756%}
12757
12758//======
12759// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12760instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, eRegI tmp ) %{
12761  match( Set flags (CmpL src zero ));
12762  effect(TEMP tmp);
12763  ins_cost(200);
12764  format %{ "MOV    $tmp,$src.lo\n\t"
12765            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12766  ins_encode( long_cmp_flags0( src, tmp ) );
12767  ins_pipe( ialu_reg_reg_long );
12768%}
12769
12770// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12771instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12772  match( Set flags (CmpL src1 src2 ));
12773  ins_cost(200+300);
12774  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12775            "JNE,s  skip\n\t"
12776            "CMP    $src1.hi,$src2.hi\n\t"
12777     "skip:\t" %}
12778  ins_encode( long_cmp_flags1( src1, src2 ) );
12779  ins_pipe( ialu_cr_reg_reg );
12780%}
12781
12782// Long compare reg == zero/reg OR reg != zero/reg
12783// Just a wrapper for a normal branch, plus the predicate test.
12784instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12785  match(If cmp flags);
12786  effect(USE labl);
12787  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12788  expand %{
12789    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12790  %}
12791%}
12792
12793// Compare 2 longs and CMOVE longs.
12794instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12795  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12796  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12797  ins_cost(400);
12798  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12799            "CMOV$cmp $dst.hi,$src.hi" %}
12800  opcode(0x0F,0x40);
12801  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12802  ins_pipe( pipe_cmov_reg_long );
12803%}
12804
12805instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12806  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12807  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12808  ins_cost(500);
12809  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12810            "CMOV$cmp $dst.hi,$src.hi" %}
12811  opcode(0x0F,0x40);
12812  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12813  ins_pipe( pipe_cmov_reg_long );
12814%}
12815
12816// Compare 2 longs and CMOVE ints.
12817instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, eRegI src) %{
12818  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12819  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12820  ins_cost(200);
12821  format %{ "CMOV$cmp $dst,$src" %}
12822  opcode(0x0F,0x40);
12823  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12824  ins_pipe( pipe_cmov_reg );
12825%}
12826
12827instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegI dst, memory src) %{
12828  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12829  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12830  ins_cost(250);
12831  format %{ "CMOV$cmp $dst,$src" %}
12832  opcode(0x0F,0x40);
12833  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12834  ins_pipe( pipe_cmov_mem );
12835%}
12836
12837// Compare 2 longs and CMOVE ints.
12838instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12839  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12840  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12841  ins_cost(200);
12842  format %{ "CMOV$cmp $dst,$src" %}
12843  opcode(0x0F,0x40);
12844  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12845  ins_pipe( pipe_cmov_reg );
12846%}
12847
12848// Compare 2 longs and CMOVE doubles
12849instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12850  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12851  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12852  ins_cost(200);
12853  expand %{
12854    fcmovD_regS(cmp,flags,dst,src);
12855  %}
12856%}
12857
12858// Compare 2 longs and CMOVE doubles
12859instruct cmovXDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regXD dst, regXD src) %{
12860  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12861  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12862  ins_cost(200);
12863  expand %{
12864    fcmovXD_regS(cmp,flags,dst,src);
12865  %}
12866%}
12867
12868instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12869  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12870  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12871  ins_cost(200);
12872  expand %{
12873    fcmovF_regS(cmp,flags,dst,src);
12874  %}
12875%}
12876
12877instruct cmovXX_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regX dst, regX src) %{
12878  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12879  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12880  ins_cost(200);
12881  expand %{
12882    fcmovX_regS(cmp,flags,dst,src);
12883  %}
12884%}
12885
12886//======
12887// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12888// Same as cmpL_reg_flags_LEGT except must negate src
12889instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, eRegI tmp ) %{
12890  match( Set flags (CmpL src zero ));
12891  effect( TEMP tmp );
12892  ins_cost(300);
12893  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12894            "CMP    $tmp,$src.lo\n\t"
12895            "SBB    $tmp,$src.hi\n\t" %}
12896  ins_encode( long_cmp_flags3(src, tmp) );
12897  ins_pipe( ialu_reg_reg_long );
12898%}
12899
12900// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12901// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12902// requires a commuted test to get the same result.
12903instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, eRegI tmp ) %{
12904  match( Set flags (CmpL src1 src2 ));
12905  effect( TEMP tmp );
12906  ins_cost(300);
12907  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12908            "MOV    $tmp,$src2.hi\n\t"
12909            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12910  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12911  ins_pipe( ialu_cr_reg_reg );
12912%}
12913
12914// Long compares reg < zero/req OR reg >= zero/req.
12915// Just a wrapper for a normal branch, plus the predicate test
12916instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12917  match(If cmp flags);
12918  effect(USE labl);
12919  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12920  ins_cost(300);
12921  expand %{
12922    jmpCon(cmp,flags,labl);    // JGT or JLE...
12923  %}
12924%}
12925
12926// Compare 2 longs and CMOVE longs.
12927instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12928  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12929  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12930  ins_cost(400);
12931  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12932            "CMOV$cmp $dst.hi,$src.hi" %}
12933  opcode(0x0F,0x40);
12934  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12935  ins_pipe( pipe_cmov_reg_long );
12936%}
12937
12938instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12939  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12940  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12941  ins_cost(500);
12942  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12943            "CMOV$cmp $dst.hi,$src.hi+4" %}
12944  opcode(0x0F,0x40);
12945  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12946  ins_pipe( pipe_cmov_reg_long );
12947%}
12948
12949// Compare 2 longs and CMOVE ints.
12950instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, eRegI src) %{
12951  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12952  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12953  ins_cost(200);
12954  format %{ "CMOV$cmp $dst,$src" %}
12955  opcode(0x0F,0x40);
12956  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12957  ins_pipe( pipe_cmov_reg );
12958%}
12959
12960instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegI dst, memory src) %{
12961  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12962  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12963  ins_cost(250);
12964  format %{ "CMOV$cmp $dst,$src" %}
12965  opcode(0x0F,0x40);
12966  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12967  ins_pipe( pipe_cmov_mem );
12968%}
12969
12970// Compare 2 longs and CMOVE ptrs.
12971instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12972  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12973  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12974  ins_cost(200);
12975  format %{ "CMOV$cmp $dst,$src" %}
12976  opcode(0x0F,0x40);
12977  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12978  ins_pipe( pipe_cmov_reg );
12979%}
12980
12981// Compare 2 longs and CMOVE doubles
12982instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12983  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12984  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12985  ins_cost(200);
12986  expand %{
12987    fcmovD_regS(cmp,flags,dst,src);
12988  %}
12989%}
12990
12991// Compare 2 longs and CMOVE doubles
12992instruct cmovXDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regXD dst, regXD src) %{
12993  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12994  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12995  ins_cost(200);
12996  expand %{
12997    fcmovXD_regS(cmp,flags,dst,src);
12998  %}
12999%}
13000
13001instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
13002  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13003  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13004  ins_cost(200);
13005  expand %{
13006    fcmovF_regS(cmp,flags,dst,src);
13007  %}
13008%}
13009
13010
13011instruct cmovXX_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regX dst, regX src) %{
13012  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
13013  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
13014  ins_cost(200);
13015  expand %{
13016    fcmovX_regS(cmp,flags,dst,src);
13017  %}
13018%}
13019
13020
13021// ============================================================================
13022// Procedure Call/Return Instructions
13023// Call Java Static Instruction
13024// Note: If this code changes, the corresponding ret_addr_offset() and
13025//       compute_padding() functions will have to be adjusted.
13026instruct CallStaticJavaDirect(method meth) %{
13027  match(CallStaticJava);
13028  effect(USE meth);
13029
13030  ins_cost(300);
13031  format %{ "CALL,static " %}
13032  opcode(0xE8); /* E8 cd */
13033  ins_encode( pre_call_FPU,
13034              Java_Static_Call( meth ),
13035              call_epilog,
13036              post_call_FPU );
13037  ins_pipe( pipe_slow );
13038  ins_pc_relative(1);
13039  ins_alignment(4);
13040%}
13041
13042// Call Java Dynamic Instruction
13043// Note: If this code changes, the corresponding ret_addr_offset() and
13044//       compute_padding() functions will have to be adjusted.
13045instruct CallDynamicJavaDirect(method meth) %{
13046  match(CallDynamicJava);
13047  effect(USE meth);
13048
13049  ins_cost(300);
13050  format %{ "MOV    EAX,(oop)-1\n\t"
13051            "CALL,dynamic" %}
13052  opcode(0xE8); /* E8 cd */
13053  ins_encode( pre_call_FPU,
13054              Java_Dynamic_Call( meth ),
13055              call_epilog,
13056              post_call_FPU );
13057  ins_pipe( pipe_slow );
13058  ins_pc_relative(1);
13059  ins_alignment(4);
13060%}
13061
13062// Call Runtime Instruction
13063instruct CallRuntimeDirect(method meth) %{
13064  match(CallRuntime );
13065  effect(USE meth);
13066
13067  ins_cost(300);
13068  format %{ "CALL,runtime " %}
13069  opcode(0xE8); /* E8 cd */
13070  // Use FFREEs to clear entries in float stack
13071  ins_encode( pre_call_FPU,
13072              FFree_Float_Stack_All,
13073              Java_To_Runtime( meth ),
13074              post_call_FPU );
13075  ins_pipe( pipe_slow );
13076  ins_pc_relative(1);
13077%}
13078
13079// Call runtime without safepoint
13080instruct CallLeafDirect(method meth) %{
13081  match(CallLeaf);
13082  effect(USE meth);
13083
13084  ins_cost(300);
13085  format %{ "CALL_LEAF,runtime " %}
13086  opcode(0xE8); /* E8 cd */
13087  ins_encode( pre_call_FPU,
13088              FFree_Float_Stack_All,
13089              Java_To_Runtime( meth ),
13090              Verify_FPU_For_Leaf, post_call_FPU );
13091  ins_pipe( pipe_slow );
13092  ins_pc_relative(1);
13093%}
13094
13095instruct CallLeafNoFPDirect(method meth) %{
13096  match(CallLeafNoFP);
13097  effect(USE meth);
13098
13099  ins_cost(300);
13100  format %{ "CALL_LEAF_NOFP,runtime " %}
13101  opcode(0xE8); /* E8 cd */
13102  ins_encode(Java_To_Runtime(meth));
13103  ins_pipe( pipe_slow );
13104  ins_pc_relative(1);
13105%}
13106
13107
13108// Return Instruction
13109// Remove the return address & jump to it.
13110instruct Ret() %{
13111  match(Return);
13112  format %{ "RET" %}
13113  opcode(0xC3);
13114  ins_encode(OpcP);
13115  ins_pipe( pipe_jmp );
13116%}
13117
13118// Tail Call; Jump from runtime stub to Java code.
13119// Also known as an 'interprocedural jump'.
13120// Target of jump will eventually return to caller.
13121// TailJump below removes the return address.
13122instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
13123  match(TailCall jump_target method_oop );
13124  ins_cost(300);
13125  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
13126  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13127  ins_encode( OpcP, RegOpc(jump_target) );
13128  ins_pipe( pipe_jmp );
13129%}
13130
13131
13132// Tail Jump; remove the return address; jump to target.
13133// TailCall above leaves the return address around.
13134instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
13135  match( TailJump jump_target ex_oop );
13136  ins_cost(300);
13137  format %{ "POP    EDX\t# pop return address into dummy\n\t"
13138            "JMP    $jump_target " %}
13139  opcode(0xFF, 0x4);  /* Opcode FF /4 */
13140  ins_encode( enc_pop_rdx,
13141              OpcP, RegOpc(jump_target) );
13142  ins_pipe( pipe_jmp );
13143%}
13144
13145// Create exception oop: created by stack-crawling runtime code.
13146// Created exception is now available to this handler, and is setup
13147// just prior to jumping to this handler.  No code emitted.
13148instruct CreateException( eAXRegP ex_oop )
13149%{
13150  match(Set ex_oop (CreateEx));
13151
13152  size(0);
13153  // use the following format syntax
13154  format %{ "# exception oop is in EAX; no code emitted" %}
13155  ins_encode();
13156  ins_pipe( empty );
13157%}
13158
13159
13160// Rethrow exception:
13161// The exception oop will come in the first argument position.
13162// Then JUMP (not call) to the rethrow stub code.
13163instruct RethrowException()
13164%{
13165  match(Rethrow);
13166
13167  // use the following format syntax
13168  format %{ "JMP    rethrow_stub" %}
13169  ins_encode(enc_rethrow);
13170  ins_pipe( pipe_jmp );
13171%}
13172
13173// inlined locking and unlocking
13174
13175
13176instruct cmpFastLock( eFlagsReg cr, eRegP object, eRegP box, eAXRegI tmp, eRegP scr) %{
13177  match( Set cr (FastLock object box) );
13178  effect( TEMP tmp, TEMP scr );
13179  ins_cost(300);
13180  format %{ "FASTLOCK $object, $box KILLS $tmp,$scr" %}
13181  ins_encode( Fast_Lock(object,box,tmp,scr) );
13182  ins_pipe( pipe_slow );
13183  ins_pc_relative(1);
13184%}
13185
13186instruct cmpFastUnlock( eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13187  match( Set cr (FastUnlock object box) );
13188  effect( TEMP tmp );
13189  ins_cost(300);
13190  format %{ "FASTUNLOCK $object, $box, $tmp" %}
13191  ins_encode( Fast_Unlock(object,box,tmp) );
13192  ins_pipe( pipe_slow );
13193  ins_pc_relative(1);
13194%}
13195
13196
13197
13198// ============================================================================
13199// Safepoint Instruction
13200instruct safePoint_poll(eFlagsReg cr) %{
13201  match(SafePoint);
13202  effect(KILL cr);
13203
13204  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13205  // On SPARC that might be acceptable as we can generate the address with
13206  // just a sethi, saving an or.  By polling at offset 0 we can end up
13207  // putting additional pressure on the index-0 in the D$.  Because of
13208  // alignment (just like the situation at hand) the lower indices tend
13209  // to see more traffic.  It'd be better to change the polling address
13210  // to offset 0 of the last $line in the polling page.
13211
13212  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13213  ins_cost(125);
13214  size(6) ;
13215  ins_encode( Safepoint_Poll() );
13216  ins_pipe( ialu_reg_mem );
13217%}
13218
13219//----------PEEPHOLE RULES-----------------------------------------------------
13220// These must follow all instruction definitions as they use the names
13221// defined in the instructions definitions.
13222//
13223// peepmatch ( root_instr_name [preceding_instruction]* );
13224//
13225// peepconstraint %{
13226// (instruction_number.operand_name relational_op instruction_number.operand_name
13227//  [, ...] );
13228// // instruction numbers are zero-based using left to right order in peepmatch
13229//
13230// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13231// // provide an instruction_number.operand_name for each operand that appears
13232// // in the replacement instruction's match rule
13233//
13234// ---------VM FLAGS---------------------------------------------------------
13235//
13236// All peephole optimizations can be turned off using -XX:-OptoPeephole
13237//
13238// Each peephole rule is given an identifying number starting with zero and
13239// increasing by one in the order seen by the parser.  An individual peephole
13240// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13241// on the command-line.
13242//
13243// ---------CURRENT LIMITATIONS----------------------------------------------
13244//
13245// Only match adjacent instructions in same basic block
13246// Only equality constraints
13247// Only constraints between operands, not (0.dest_reg == EAX_enc)
13248// Only one replacement instruction
13249//
13250// ---------EXAMPLE----------------------------------------------------------
13251//
13252// // pertinent parts of existing instructions in architecture description
13253// instruct movI(eRegI dst, eRegI src) %{
13254//   match(Set dst (CopyI src));
13255// %}
13256//
13257// instruct incI_eReg(eRegI dst, immI1 src, eFlagsReg cr) %{
13258//   match(Set dst (AddI dst src));
13259//   effect(KILL cr);
13260// %}
13261//
13262// // Change (inc mov) to lea
13263// peephole %{
13264//   // increment preceeded by register-register move
13265//   peepmatch ( incI_eReg movI );
13266//   // require that the destination register of the increment
13267//   // match the destination register of the move
13268//   peepconstraint ( 0.dst == 1.dst );
13269//   // construct a replacement instruction that sets
13270//   // the destination to ( move's source register + one )
13271//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13272// %}
13273//
13274// Implementation no longer uses movX instructions since
13275// machine-independent system no longer uses CopyX nodes.
13276//
13277// peephole %{
13278//   peepmatch ( incI_eReg movI );
13279//   peepconstraint ( 0.dst == 1.dst );
13280//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13281// %}
13282//
13283// peephole %{
13284//   peepmatch ( decI_eReg movI );
13285//   peepconstraint ( 0.dst == 1.dst );
13286//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13287// %}
13288//
13289// peephole %{
13290//   peepmatch ( addI_eReg_imm movI );
13291//   peepconstraint ( 0.dst == 1.dst );
13292//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13293// %}
13294//
13295// peephole %{
13296//   peepmatch ( addP_eReg_imm movP );
13297//   peepconstraint ( 0.dst == 1.dst );
13298//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13299// %}
13300
13301// // Change load of spilled value to only a spill
13302// instruct storeI(memory mem, eRegI src) %{
13303//   match(Set mem (StoreI mem src));
13304// %}
13305//
13306// instruct loadI(eRegI dst, memory mem) %{
13307//   match(Set dst (LoadI mem));
13308// %}
13309//
13310peephole %{
13311  peepmatch ( loadI storeI );
13312  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13313  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13314%}
13315
13316//----------SMARTSPILL RULES---------------------------------------------------
13317// These must follow all instruction definitions as they use the names
13318// defined in the instructions definitions.
13319