x86_32.ad revision 9737:e286c9ccd58d
1//
2// Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
78// allocator, and only shows up in the encodings.
79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81// Ok so here's the trick FPR1 is really st(0) except in the midst
82// of emission of assembly for a machnode. During the emission the fpu stack
83// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84// the stack will not have this element so FPR1 == st(0) from the
85// oopMap viewpoint. This same weirdness with numbering causes
86// instruction encoding to have to play games with the register
87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88// where it does flt->flt moves to see an example
89//
90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104//
105// Empty fill registers, which are never used, but supply alignment to xmm regs
106//
107reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
108reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
109reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
110reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
111reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
112reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
113reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
114reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
115
116// Specify priority of register selection within phases of register
117// allocation.  Highest priority is first.  A useful heuristic is to
118// give registers a low priority when they are required by machine
119// instructions, like EAX and EDX.  Registers which are used as
120// pairs must fall on an even boundary (witness the FPR#L's in this list).
121// For the Intel integer registers, the equivalent Long pairs are
122// EDX:EAX, EBX:ECX, and EDI:EBP.
123alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
124                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126                    FPR6L, FPR6H, FPR7L, FPR7H,
127                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130//----------Architecture Description Register Classes--------------------------
131// Several register classes are automatically defined based upon information in
132// this architecture description.
133// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
134// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
137//
138// Class for no registers (empty set).
139reg_class no_reg();
140
141// Class for all registers
142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
143// Class for all registers (excluding EBP)
144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
145// Dynamic register class that selects at runtime between register classes
146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
149
150// Class for general registers
151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
152// Class for general registers (excluding EBP).
153// This register class can be used for implicit null checks on win95.
154// It is also safe for use by tailjumps (we don't want to allocate in ebp).
155// Used also if the PreserveFramePointer flag is true.
156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
157// Dynamic register class that selects between int_reg and int_reg_no_ebp.
158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
159
160// Class of "X" registers
161reg_class int_x_reg(EBX, ECX, EDX, EAX);
162
163// Class of registers that can appear in an address with no offset.
164// EBP and ESP require an extra instruction byte for zero offset.
165// Used in fast-unlock
166reg_class p_reg(EDX, EDI, ESI, EBX);
167
168// Class for general registers excluding ECX
169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
170// Class for general registers excluding ECX (and EBP)
171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
174
175// Class for general registers excluding EAX
176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
177
178// Class for general registers excluding EAX and EBX.
179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
180// Class for general registers excluding EAX and EBX (and EBP)
181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
184
185// Class of EAX (for multiply and divide operations)
186reg_class eax_reg(EAX);
187
188// Class of EBX (for atomic add)
189reg_class ebx_reg(EBX);
190
191// Class of ECX (for shift and JCXZ operations and cmpLTMask)
192reg_class ecx_reg(ECX);
193
194// Class of EDX (for multiply and divide operations)
195reg_class edx_reg(EDX);
196
197// Class of EDI (for synchronization)
198reg_class edi_reg(EDI);
199
200// Class of ESI (for synchronization)
201reg_class esi_reg(ESI);
202
203// Singleton class for stack pointer
204reg_class sp_reg(ESP);
205
206// Singleton class for instruction pointer
207// reg_class ip_reg(EIP);
208
209// Class of integer register pairs
210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
211// Class of integer register pairs (excluding EBP and EDI);
212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
213// Dynamic register class that selects between long_reg and long_reg_no_ebp.
214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
215
216// Class of integer register pairs that aligns with calling convention
217reg_class eadx_reg( EAX,EDX );
218reg_class ebcx_reg( ECX,EBX );
219
220// Not AX or DX, used in divides
221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
222// Not AX or DX (and neither EBP), used in divides
223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
226
227// Floating point registers.  Notice FPR0 is not a choice.
228// FPR0 is not ever allocated; we use clever encodings to fake
229// a 2-address instructions out of Intels FP stack.
230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
231
232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
233                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
234                      FPR7L,FPR7H );
235
236reg_class fp_flt_reg0( FPR1L );
237reg_class fp_dbl_reg0( FPR1L,FPR1H );
238reg_class fp_dbl_reg1( FPR2L,FPR2H );
239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
240                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
241
242%}
243
244
245//----------SOURCE BLOCK-------------------------------------------------------
246// This is a block of C++ code which provides values, functions, and
247// definitions necessary in the rest of the architecture description
248source_hpp %{
249// Must be visible to the DFA in dfa_x86_32.cpp
250extern bool is_operand_hi32_zero(Node* n);
251%}
252
253source %{
254#define   RELOC_IMM32    Assembler::imm_operand
255#define   RELOC_DISP32   Assembler::disp32_operand
256
257#define __ _masm.
258
259// How to find the high register of a Long pair, given the low register
260#define   HIGH_FROM_LOW(x) ((x)+2)
261
262// These masks are used to provide 128-bit aligned bitmasks to the XMM
263// instructions, to allow sign-masking or sign-bit flipping.  They allow
264// fast versions of NegF/NegD and AbsF/AbsD.
265
266// Note: 'double' and 'long long' have 32-bits alignment on x86.
267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
268  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
269  // of 128-bits operands for SSE instructions.
270  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
271  // Store the value to a 128-bits operand.
272  operand[0] = lo;
273  operand[1] = hi;
274  return operand;
275}
276
277// Buffer for 128-bits masks used by SSE instructions.
278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
279
280// Static initialization during VM startup.
281static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
283static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
285
286// Offset hacking within calls.
287static int pre_call_resets_size() {
288  int size = 0;
289  Compile* C = Compile::current();
290  if (C->in_24_bit_fp_mode()) {
291    size += 6; // fldcw
292  }
293  if (C->max_vector_size() > 16) {
294    size += 3; // vzeroupper
295  }
296  return size;
297}
298
299// !!!!! Special hack to get all type of calls to specify the byte offset
300//       from the start of the call to the point where the return address
301//       will point.
302int MachCallStaticJavaNode::ret_addr_offset() {
303  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
304}
305
306int MachCallDynamicJavaNode::ret_addr_offset() {
307  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
308}
309
310static int sizeof_FFree_Float_Stack_All = -1;
311
312int MachCallRuntimeNode::ret_addr_offset() {
313  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
314  return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
315}
316
317// Indicate if the safepoint node needs the polling page as an input.
318// Since x86 does have absolute addressing, it doesn't.
319bool SafePointNode::needs_polling_address_input() {
320  return false;
321}
322
323//
324// Compute padding required for nodes which need alignment
325//
326
327// The address of the call instruction needs to be 4-byte aligned to
328// ensure that it does not span a cache line so that it can be patched.
329int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
330  current_offset += pre_call_resets_size();  // skip fldcw, if any
331  current_offset += 1;      // skip call opcode byte
332  return round_to(current_offset, alignment_required()) - current_offset;
333}
334
335// The address of the call instruction needs to be 4-byte aligned to
336// ensure that it does not span a cache line so that it can be patched.
337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338  current_offset += pre_call_resets_size();  // skip fldcw, if any
339  current_offset += 5;      // skip MOV instruction
340  current_offset += 1;      // skip call opcode byte
341  return round_to(current_offset, alignment_required()) - current_offset;
342}
343
344// EMIT_RM()
345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347  cbuf.insts()->emit_int8(c);
348}
349
350// EMIT_CC()
351void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352  unsigned char c = (unsigned char)( f1 | f2 );
353  cbuf.insts()->emit_int8(c);
354}
355
356// EMIT_OPCODE()
357void emit_opcode(CodeBuffer &cbuf, int code) {
358  cbuf.insts()->emit_int8((unsigned char) code);
359}
360
361// EMIT_OPCODE() w/ relocation information
362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364  emit_opcode(cbuf, code);
365}
366
367// EMIT_D8()
368void emit_d8(CodeBuffer &cbuf, int d8) {
369  cbuf.insts()->emit_int8((unsigned char) d8);
370}
371
372// EMIT_D16()
373void emit_d16(CodeBuffer &cbuf, int d16) {
374  cbuf.insts()->emit_int16(d16);
375}
376
377// EMIT_D32()
378void emit_d32(CodeBuffer &cbuf, int d32) {
379  cbuf.insts()->emit_int32(d32);
380}
381
382// emit 32 bit value and construct relocation entry from relocInfo::relocType
383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384        int format) {
385  cbuf.relocate(cbuf.insts_mark(), reloc, format);
386  cbuf.insts()->emit_int32(d32);
387}
388
389// emit 32 bit value and construct relocation entry from RelocationHolder
390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391        int format) {
392#ifdef ASSERT
393  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394    assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
395  }
396#endif
397  cbuf.relocate(cbuf.insts_mark(), rspec, format);
398  cbuf.insts()->emit_int32(d32);
399}
400
401// Access stack slot for load or store
402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
404  if( -128 <= disp && disp <= 127 ) {
405    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
406    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
407    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
408  } else {
409    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
410    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
411    emit_d32(cbuf, disp);     // Displacement  // R/M byte
412  }
413}
414
415   // rRegI ereg, memory mem) %{    // emit_reg_mem
416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
417  // There is no index & no scale, use form without SIB byte
418  if ((index == 0x4) &&
419      (scale == 0) && (base != ESP_enc)) {
420    // If no displacement, mode is 0x0; unless base is [EBP]
421    if ( (displace == 0) && (base != EBP_enc) ) {
422      emit_rm(cbuf, 0x0, reg_encoding, base);
423    }
424    else {                    // If 8-bit displacement, mode 0x1
425      if ((displace >= -128) && (displace <= 127)
426          && (disp_reloc == relocInfo::none) ) {
427        emit_rm(cbuf, 0x1, reg_encoding, base);
428        emit_d8(cbuf, displace);
429      }
430      else {                  // If 32-bit displacement
431        if (base == -1) { // Special flag for absolute address
432          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433          // (manual lies; no SIB needed here)
434          if ( disp_reloc != relocInfo::none ) {
435            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
436          } else {
437            emit_d32      (cbuf, displace);
438          }
439        }
440        else {                // Normal base + offset
441          emit_rm(cbuf, 0x2, reg_encoding, base);
442          if ( disp_reloc != relocInfo::none ) {
443            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444          } else {
445            emit_d32      (cbuf, displace);
446          }
447        }
448      }
449    }
450  }
451  else {                      // Else, encode with the SIB byte
452    // If no displacement, mode is 0x0; unless base is [EBP]
453    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
454      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455      emit_rm(cbuf, scale, index, base);
456    }
457    else {                    // If 8-bit displacement, mode 0x1
458      if ((displace >= -128) && (displace <= 127)
459          && (disp_reloc == relocInfo::none) ) {
460        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461        emit_rm(cbuf, scale, index, base);
462        emit_d8(cbuf, displace);
463      }
464      else {                  // If 32-bit displacement
465        if (base == 0x04 ) {
466          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467          emit_rm(cbuf, scale, index, 0x04);
468        } else {
469          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470          emit_rm(cbuf, scale, index, base);
471        }
472        if ( disp_reloc != relocInfo::none ) {
473          emit_d32_reloc(cbuf, displace, disp_reloc, 1);
474        } else {
475          emit_d32      (cbuf, displace);
476        }
477      }
478    }
479  }
480}
481
482
483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484  if( dst_encoding == src_encoding ) {
485    // reg-reg copy, use an empty encoding
486  } else {
487    emit_opcode( cbuf, 0x8B );
488    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489  }
490}
491
492void emit_cmpfp_fixup(MacroAssembler& _masm) {
493  Label exit;
494  __ jccb(Assembler::noParity, exit);
495  __ pushf();
496  //
497  // comiss/ucomiss instructions set ZF,PF,CF flags and
498  // zero OF,AF,SF for NaN values.
499  // Fixup flags by zeroing ZF,PF so that compare of NaN
500  // values returns 'less than' result (CF is set).
501  // Leave the rest of flags unchanged.
502  //
503  //    7 6 5 4 3 2 1 0
504  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
505  //    0 0 1 0 1 0 1 1   (0x2B)
506  //
507  __ andl(Address(rsp, 0), 0xffffff2b);
508  __ popf();
509  __ bind(exit);
510}
511
512void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513  Label done;
514  __ movl(dst, -1);
515  __ jcc(Assembler::parity, done);
516  __ jcc(Assembler::below, done);
517  __ setb(Assembler::notEqual, dst);
518  __ movzbl(dst, dst);
519  __ bind(done);
520}
521
522
523//=============================================================================
524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525
526int Compile::ConstantTable::calculate_table_base_offset() const {
527  return 0;  // absolute addressing, no offset
528}
529
530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
532  ShouldNotReachHere();
533}
534
535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
536  // Empty encoding
537}
538
539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
540  return 0;
541}
542
543#ifndef PRODUCT
544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
545  st->print("# MachConstantBaseNode (empty encoding)");
546}
547#endif
548
549
550//=============================================================================
551#ifndef PRODUCT
552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553  Compile* C = ra_->C;
554
555  int framesize = C->frame_size_in_bytes();
556  int bangsize = C->bang_size_in_bytes();
557  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
558  // Remove wordSize for return addr which is already pushed.
559  framesize -= wordSize;
560
561  if (C->need_stack_bang(bangsize)) {
562    framesize -= wordSize;
563    st->print("# stack bang (%d bytes)", bangsize);
564    st->print("\n\t");
565    st->print("PUSH   EBP\t# Save EBP");
566    if (PreserveFramePointer) {
567      st->print("\n\t");
568      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
569    }
570    if (framesize) {
571      st->print("\n\t");
572      st->print("SUB    ESP, #%d\t# Create frame",framesize);
573    }
574  } else {
575    st->print("SUB    ESP, #%d\t# Create frame",framesize);
576    st->print("\n\t");
577    framesize -= wordSize;
578    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
579    if (PreserveFramePointer) {
580      st->print("\n\t");
581      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
582      if (framesize > 0) {
583        st->print("\n\t");
584        st->print("ADD    EBP, #%d", framesize);
585      }
586    }
587  }
588
589  if (VerifyStackAtCalls) {
590    st->print("\n\t");
591    framesize -= wordSize;
592    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
593  }
594
595  if( C->in_24_bit_fp_mode() ) {
596    st->print("\n\t");
597    st->print("FLDCW  \t# load 24 bit fpu control word");
598  }
599  if (UseSSE >= 2 && VerifyFPU) {
600    st->print("\n\t");
601    st->print("# verify FPU stack (must be clean on entry)");
602  }
603
604#ifdef ASSERT
605  if (VerifyStackAtCalls) {
606    st->print("\n\t");
607    st->print("# stack alignment check");
608  }
609#endif
610  st->cr();
611}
612#endif
613
614
615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
616  Compile* C = ra_->C;
617  MacroAssembler _masm(&cbuf);
618
619  int framesize = C->frame_size_in_bytes();
620  int bangsize = C->bang_size_in_bytes();
621
622  __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
623
624  C->set_frame_complete(cbuf.insts_size());
625
626  if (C->has_mach_constant_base_node()) {
627    // NOTE: We set the table base offset here because users might be
628    // emitted before MachConstantBaseNode.
629    Compile::ConstantTable& constant_table = C->constant_table();
630    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
631  }
632}
633
634uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
635  return MachNode::size(ra_); // too many variables; just compute it the hard way
636}
637
638int MachPrologNode::reloc() const {
639  return 0; // a large enough number
640}
641
642//=============================================================================
643#ifndef PRODUCT
644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
645  Compile *C = ra_->C;
646  int framesize = C->frame_size_in_bytes();
647  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
648  // Remove two words for return addr and rbp,
649  framesize -= 2*wordSize;
650
651  if (C->max_vector_size() > 16) {
652    st->print("VZEROUPPER");
653    st->cr(); st->print("\t");
654  }
655  if (C->in_24_bit_fp_mode()) {
656    st->print("FLDCW  standard control word");
657    st->cr(); st->print("\t");
658  }
659  if (framesize) {
660    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
661    st->cr(); st->print("\t");
662  }
663  st->print_cr("POPL   EBP"); st->print("\t");
664  if (do_polling() && C->is_method_compilation()) {
665    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
666    st->cr(); st->print("\t");
667  }
668}
669#endif
670
671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
672  Compile *C = ra_->C;
673  MacroAssembler _masm(&cbuf);
674
675  if (C->max_vector_size() > 16) {
676    // Clear upper bits of YMM registers when current compiled code uses
677    // wide vectors to avoid AVX <-> SSE transition penalty during call.
678    _masm.vzeroupper();
679  }
680  // If method set FPU control word, restore to standard control word
681  if (C->in_24_bit_fp_mode()) {
682    _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
683  }
684
685  int framesize = C->frame_size_in_bytes();
686  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687  // Remove two words for return addr and rbp,
688  framesize -= 2*wordSize;
689
690  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
691
692  if (framesize >= 128) {
693    emit_opcode(cbuf, 0x81); // add  SP, #framesize
694    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
695    emit_d32(cbuf, framesize);
696  } else if (framesize) {
697    emit_opcode(cbuf, 0x83); // add  SP, #framesize
698    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
699    emit_d8(cbuf, framesize);
700  }
701
702  emit_opcode(cbuf, 0x58 | EBP_enc);
703
704  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
705    __ reserved_stack_check();
706  }
707
708  if (do_polling() && C->is_method_compilation()) {
709    cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
710    emit_opcode(cbuf,0x85);
711    emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
712    emit_d32(cbuf, (intptr_t)os::get_polling_page());
713  }
714}
715
716uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
717  Compile *C = ra_->C;
718  // If method set FPU control word, restore to standard control word
719  int size = C->in_24_bit_fp_mode() ? 6 : 0;
720  if (C->max_vector_size() > 16) size += 3; // vzeroupper
721  if (do_polling() && C->is_method_compilation()) size += 6;
722
723  int framesize = C->frame_size_in_bytes();
724  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
725  // Remove two words for return addr and rbp,
726  framesize -= 2*wordSize;
727
728  size++; // popl rbp,
729
730  if (framesize >= 128) {
731    size += 6;
732  } else {
733    size += framesize ? 3 : 0;
734  }
735  size += 64; // added to support ReservedStackAccess
736  return size;
737}
738
739int MachEpilogNode::reloc() const {
740  return 0; // a large enough number
741}
742
743const Pipeline * MachEpilogNode::pipeline() const {
744  return MachNode::pipeline_class();
745}
746
747int MachEpilogNode::safepoint_offset() const { return 0; }
748
749//=============================================================================
750
751enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
752static enum RC rc_class( OptoReg::Name reg ) {
753
754  if( !OptoReg::is_valid(reg)  ) return rc_bad;
755  if (OptoReg::is_stack(reg)) return rc_stack;
756
757  VMReg r = OptoReg::as_VMReg(reg);
758  if (r->is_Register()) return rc_int;
759  if (r->is_FloatRegister()) {
760    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
761    return rc_float;
762  }
763  assert(r->is_XMMRegister(), "must be");
764  return rc_xmm;
765}
766
767static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
768                        int opcode, const char *op_str, int size, outputStream* st ) {
769  if( cbuf ) {
770    emit_opcode  (*cbuf, opcode );
771    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
772#ifndef PRODUCT
773  } else if( !do_size ) {
774    if( size != 0 ) st->print("\n\t");
775    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
776      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
777      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
778    } else { // FLD, FST, PUSH, POP
779      st->print("%s [ESP + #%d]",op_str,offset);
780    }
781#endif
782  }
783  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
784  return size+3+offset_size;
785}
786
787// Helper for XMM registers.  Extra opcode bits, limited syntax.
788static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
789                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
790  int in_size_in_bits = Assembler::EVEX_32bit;
791  int evex_encoding = 0;
792  if (reg_lo+1 == reg_hi) {
793    in_size_in_bits = Assembler::EVEX_64bit;
794    evex_encoding = Assembler::VEX_W;
795  }
796  if (cbuf) {
797    MacroAssembler _masm(cbuf);
798    if (reg_lo+1 == reg_hi) { // double move?
799      if (is_load) {
800        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
801      } else {
802        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
803      }
804    } else {
805      if (is_load) {
806        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
807      } else {
808        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
809      }
810    }
811#ifndef PRODUCT
812  } else if (!do_size) {
813    if (size != 0) st->print("\n\t");
814    if (reg_lo+1 == reg_hi) { // double move?
815      if (is_load) st->print("%s %s,[ESP + #%d]",
816                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
817                              Matcher::regName[reg_lo], offset);
818      else         st->print("MOVSD  [ESP + #%d],%s",
819                              offset, Matcher::regName[reg_lo]);
820    } else {
821      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
822                              Matcher::regName[reg_lo], offset);
823      else         st->print("MOVSS  [ESP + #%d],%s",
824                              offset, Matcher::regName[reg_lo]);
825    }
826#endif
827  }
828  bool is_single_byte = false;
829  if ((UseAVX > 2) && (offset != 0)) {
830    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
831  }
832  int offset_size = 0;
833  if (UseAVX > 2 ) {
834    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
835  } else {
836    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
837  }
838  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
839  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
840  return size+5+offset_size;
841}
842
843
844static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
845                            int src_hi, int dst_hi, int size, outputStream* st ) {
846  if (cbuf) {
847    MacroAssembler _masm(cbuf);
848    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
849      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
850                as_XMMRegister(Matcher::_regEncode[src_lo]));
851    } else {
852      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
853                as_XMMRegister(Matcher::_regEncode[src_lo]));
854    }
855#ifndef PRODUCT
856  } else if (!do_size) {
857    if (size != 0) st->print("\n\t");
858    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
859      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
860        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
861      } else {
862        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
863      }
864    } else {
865      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
866        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
867      } else {
868        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
869      }
870    }
871#endif
872  }
873  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
874  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
875  int sz = (UseAVX > 2) ? 6 : 4;
876  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
877      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
878  return size + sz;
879}
880
881static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
882                            int src_hi, int dst_hi, int size, outputStream* st ) {
883  // 32-bit
884  if (cbuf) {
885    MacroAssembler _masm(cbuf);
886    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
887             as_Register(Matcher::_regEncode[src_lo]));
888#ifndef PRODUCT
889  } else if (!do_size) {
890    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
891#endif
892  }
893  return (UseAVX> 2) ? 6 : 4;
894}
895
896
897static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
898                                 int src_hi, int dst_hi, int size, outputStream* st ) {
899  // 32-bit
900  if (cbuf) {
901    MacroAssembler _masm(cbuf);
902    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
903             as_XMMRegister(Matcher::_regEncode[src_lo]));
904#ifndef PRODUCT
905  } else if (!do_size) {
906    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
907#endif
908  }
909  return (UseAVX> 2) ? 6 : 4;
910}
911
912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
913  if( cbuf ) {
914    emit_opcode(*cbuf, 0x8B );
915    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
916#ifndef PRODUCT
917  } else if( !do_size ) {
918    if( size != 0 ) st->print("\n\t");
919    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
920#endif
921  }
922  return size+2;
923}
924
925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
926                                 int offset, int size, outputStream* st ) {
927  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
928    if( cbuf ) {
929      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
930      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
931#ifndef PRODUCT
932    } else if( !do_size ) {
933      if( size != 0 ) st->print("\n\t");
934      st->print("FLD    %s",Matcher::regName[src_lo]);
935#endif
936    }
937    size += 2;
938  }
939
940  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
941  const char *op_str;
942  int op;
943  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
944    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
945    op = 0xDD;
946  } else {                   // 32-bit store
947    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
948    op = 0xD9;
949    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
950  }
951
952  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
953}
954
955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
957                          int src_hi, int dst_hi, uint ireg, outputStream* st);
958
959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
960                            int stack_offset, int reg, uint ireg, outputStream* st);
961
962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
963                                     int dst_offset, uint ireg, outputStream* st) {
964  int calc_size = 0;
965  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
966  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
967  switch (ireg) {
968  case Op_VecS:
969    calc_size = 3+src_offset_size + 3+dst_offset_size;
970    break;
971  case Op_VecD:
972    calc_size = 3+src_offset_size + 3+dst_offset_size;
973    src_offset += 4;
974    dst_offset += 4;
975    src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
976    dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
977    calc_size += 3+src_offset_size + 3+dst_offset_size;
978    break;
979  case Op_VecX:
980  case Op_VecY:
981  case Op_VecZ:
982    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
983    break;
984  default:
985    ShouldNotReachHere();
986  }
987  if (cbuf) {
988    MacroAssembler _masm(cbuf);
989    int offset = __ offset();
990    switch (ireg) {
991    case Op_VecS:
992      __ pushl(Address(rsp, src_offset));
993      __ popl (Address(rsp, dst_offset));
994      break;
995    case Op_VecD:
996      __ pushl(Address(rsp, src_offset));
997      __ popl (Address(rsp, dst_offset));
998      __ pushl(Address(rsp, src_offset+4));
999      __ popl (Address(rsp, dst_offset+4));
1000      break;
1001    case Op_VecX:
1002      __ movdqu(Address(rsp, -16), xmm0);
1003      __ movdqu(xmm0, Address(rsp, src_offset));
1004      __ movdqu(Address(rsp, dst_offset), xmm0);
1005      __ movdqu(xmm0, Address(rsp, -16));
1006      break;
1007    case Op_VecY:
1008      __ vmovdqu(Address(rsp, -32), xmm0);
1009      __ vmovdqu(xmm0, Address(rsp, src_offset));
1010      __ vmovdqu(Address(rsp, dst_offset), xmm0);
1011      __ vmovdqu(xmm0, Address(rsp, -32));
1012    case Op_VecZ:
1013      __ evmovdqul(Address(rsp, -64), xmm0, 2);
1014      __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1015      __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1016      __ evmovdqul(xmm0, Address(rsp, -64), 2);
1017      break;
1018    default:
1019      ShouldNotReachHere();
1020    }
1021    int size = __ offset() - offset;
1022    assert(size == calc_size, "incorrect size calculattion");
1023    return size;
1024#ifndef PRODUCT
1025  } else if (!do_size) {
1026    switch (ireg) {
1027    case Op_VecS:
1028      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1029                "popl    [rsp + #%d]",
1030                src_offset, dst_offset);
1031      break;
1032    case Op_VecD:
1033      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1034                "popq    [rsp + #%d]\n\t"
1035                "pushl   [rsp + #%d]\n\t"
1036                "popq    [rsp + #%d]",
1037                src_offset, dst_offset, src_offset+4, dst_offset+4);
1038      break;
1039     case Op_VecX:
1040      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1041                "movdqu  xmm0, [rsp + #%d]\n\t"
1042                "movdqu  [rsp + #%d], xmm0\n\t"
1043                "movdqu  xmm0, [rsp - #16]",
1044                src_offset, dst_offset);
1045      break;
1046    case Op_VecY:
1047      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1048                "vmovdqu xmm0, [rsp + #%d]\n\t"
1049                "vmovdqu [rsp + #%d], xmm0\n\t"
1050                "vmovdqu xmm0, [rsp - #32]",
1051                src_offset, dst_offset);
1052    case Op_VecZ:
1053      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1054                "vmovdqu xmm0, [rsp + #%d]\n\t"
1055                "vmovdqu [rsp + #%d], xmm0\n\t"
1056                "vmovdqu xmm0, [rsp - #64]",
1057                src_offset, dst_offset);
1058      break;
1059    default:
1060      ShouldNotReachHere();
1061    }
1062#endif
1063  }
1064  return calc_size;
1065}
1066
1067uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1068  // Get registers to move
1069  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1070  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1071  OptoReg::Name dst_second = ra_->get_reg_second(this );
1072  OptoReg::Name dst_first = ra_->get_reg_first(this );
1073
1074  enum RC src_second_rc = rc_class(src_second);
1075  enum RC src_first_rc = rc_class(src_first);
1076  enum RC dst_second_rc = rc_class(dst_second);
1077  enum RC dst_first_rc = rc_class(dst_first);
1078
1079  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1080
1081  // Generate spill code!
1082  int size = 0;
1083
1084  if( src_first == dst_first && src_second == dst_second )
1085    return size;            // Self copy, no move
1086
1087  if (bottom_type()->isa_vect() != NULL) {
1088    uint ireg = ideal_reg();
1089    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1090    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1091    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1092    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1093      // mem -> mem
1094      int src_offset = ra_->reg2offset(src_first);
1095      int dst_offset = ra_->reg2offset(dst_first);
1096      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1097    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1098      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1099    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1100      int stack_offset = ra_->reg2offset(dst_first);
1101      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1102    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1103      int stack_offset = ra_->reg2offset(src_first);
1104      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1105    } else {
1106      ShouldNotReachHere();
1107    }
1108  }
1109
1110  // --------------------------------------
1111  // Check for mem-mem move.  push/pop to move.
1112  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1113    if( src_second == dst_first ) { // overlapping stack copy ranges
1114      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1115      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1116      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1117      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1118    }
1119    // move low bits
1120    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1121    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1122    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1123      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1124      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1125    }
1126    return size;
1127  }
1128
1129  // --------------------------------------
1130  // Check for integer reg-reg copy
1131  if( src_first_rc == rc_int && dst_first_rc == rc_int )
1132    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1133
1134  // Check for integer store
1135  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1136    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1137
1138  // Check for integer load
1139  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1140    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1141
1142  // Check for integer reg-xmm reg copy
1143  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1144    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1145            "no 64 bit integer-float reg moves" );
1146    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1147  }
1148  // --------------------------------------
1149  // Check for float reg-reg copy
1150  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1151    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1152            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1153    if( cbuf ) {
1154
1155      // Note the mucking with the register encode to compensate for the 0/1
1156      // indexing issue mentioned in a comment in the reg_def sections
1157      // for FPR registers many lines above here.
1158
1159      if( src_first != FPR1L_num ) {
1160        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1161        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1162        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1163        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1164     } else {
1165        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1166        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1167     }
1168#ifndef PRODUCT
1169    } else if( !do_size ) {
1170      if( size != 0 ) st->print("\n\t");
1171      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1172      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1173#endif
1174    }
1175    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1176  }
1177
1178  // Check for float store
1179  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1180    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1181  }
1182
1183  // Check for float load
1184  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1185    int offset = ra_->reg2offset(src_first);
1186    const char *op_str;
1187    int op;
1188    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1189      op_str = "FLD_D";
1190      op = 0xDD;
1191    } else {                   // 32-bit load
1192      op_str = "FLD_S";
1193      op = 0xD9;
1194      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1195    }
1196    if( cbuf ) {
1197      emit_opcode  (*cbuf, op );
1198      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1199      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1200      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1201#ifndef PRODUCT
1202    } else if( !do_size ) {
1203      if( size != 0 ) st->print("\n\t");
1204      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1205#endif
1206    }
1207    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1208    return size + 3+offset_size+2;
1209  }
1210
1211  // Check for xmm reg-reg copy
1212  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1213    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1214            (src_first+1 == src_second && dst_first+1 == dst_second),
1215            "no non-adjacent float-moves" );
1216    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1217  }
1218
1219  // Check for xmm reg-integer reg copy
1220  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1221    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1222            "no 64 bit float-integer reg moves" );
1223    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1224  }
1225
1226  // Check for xmm store
1227  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1228    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1229  }
1230
1231  // Check for float xmm load
1232  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1233    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1234  }
1235
1236  // Copy from float reg to xmm reg
1237  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1238    // copy to the top of stack from floating point reg
1239    // and use LEA to preserve flags
1240    if( cbuf ) {
1241      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1242      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1243      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1244      emit_d8(*cbuf,0xF8);
1245#ifndef PRODUCT
1246    } else if( !do_size ) {
1247      if( size != 0 ) st->print("\n\t");
1248      st->print("LEA    ESP,[ESP-8]");
1249#endif
1250    }
1251    size += 4;
1252
1253    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1254
1255    // Copy from the temp memory to the xmm reg.
1256    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1257
1258    if( cbuf ) {
1259      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1260      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1261      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1262      emit_d8(*cbuf,0x08);
1263#ifndef PRODUCT
1264    } else if( !do_size ) {
1265      if( size != 0 ) st->print("\n\t");
1266      st->print("LEA    ESP,[ESP+8]");
1267#endif
1268    }
1269    size += 4;
1270    return size;
1271  }
1272
1273  assert( size > 0, "missed a case" );
1274
1275  // --------------------------------------------------------------------
1276  // Check for second bits still needing moving.
1277  if( src_second == dst_second )
1278    return size;               // Self copy; no move
1279  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1280
1281  // Check for second word int-int move
1282  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1283    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1284
1285  // Check for second word integer store
1286  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1287    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1288
1289  // Check for second word integer load
1290  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1291    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1292
1293
1294  Unimplemented();
1295  return 0; // Mute compiler
1296}
1297
1298#ifndef PRODUCT
1299void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1300  implementation( NULL, ra_, false, st );
1301}
1302#endif
1303
1304void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1305  implementation( &cbuf, ra_, false, NULL );
1306}
1307
1308uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1309  return implementation( NULL, ra_, true, NULL );
1310}
1311
1312
1313//=============================================================================
1314#ifndef PRODUCT
1315void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1316  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1317  int reg = ra_->get_reg_first(this);
1318  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1319}
1320#endif
1321
1322void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1323  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1324  int reg = ra_->get_encode(this);
1325  if( offset >= 128 ) {
1326    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1327    emit_rm(cbuf, 0x2, reg, 0x04);
1328    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1329    emit_d32(cbuf, offset);
1330  }
1331  else {
1332    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1333    emit_rm(cbuf, 0x1, reg, 0x04);
1334    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1335    emit_d8(cbuf, offset);
1336  }
1337}
1338
1339uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1340  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1341  if( offset >= 128 ) {
1342    return 7;
1343  }
1344  else {
1345    return 4;
1346  }
1347}
1348
1349//=============================================================================
1350#ifndef PRODUCT
1351void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1352  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1353  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1354  st->print_cr("\tNOP");
1355  st->print_cr("\tNOP");
1356  if( !OptoBreakpoint )
1357    st->print_cr("\tNOP");
1358}
1359#endif
1360
1361void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1362  MacroAssembler masm(&cbuf);
1363#ifdef ASSERT
1364  uint insts_size = cbuf.insts_size();
1365#endif
1366  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1367  masm.jump_cc(Assembler::notEqual,
1368               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1369  /* WARNING these NOPs are critical so that verified entry point is properly
1370     aligned for patching by NativeJump::patch_verified_entry() */
1371  int nops_cnt = 2;
1372  if( !OptoBreakpoint ) // Leave space for int3
1373     nops_cnt += 1;
1374  masm.nop(nops_cnt);
1375
1376  assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1377}
1378
1379uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1380  return OptoBreakpoint ? 11 : 12;
1381}
1382
1383
1384//=============================================================================
1385
1386int Matcher::regnum_to_fpu_offset(int regnum) {
1387  return regnum - 32; // The FP registers are in the second chunk
1388}
1389
1390// This is UltraSparc specific, true just means we have fast l2f conversion
1391const bool Matcher::convL2FSupported(void) {
1392  return true;
1393}
1394
1395// Is this branch offset short enough that a short branch can be used?
1396//
1397// NOTE: If the platform does not provide any short branch variants, then
1398//       this method should return false for offset 0.
1399bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1400  // The passed offset is relative to address of the branch.
1401  // On 86 a branch displacement is calculated relative to address
1402  // of a next instruction.
1403  offset -= br_size;
1404
1405  // the short version of jmpConUCF2 contains multiple branches,
1406  // making the reach slightly less
1407  if (rule == jmpConUCF2_rule)
1408    return (-126 <= offset && offset <= 125);
1409  return (-128 <= offset && offset <= 127);
1410}
1411
1412const bool Matcher::isSimpleConstant64(jlong value) {
1413  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1414  return false;
1415}
1416
1417// The ecx parameter to rep stos for the ClearArray node is in dwords.
1418const bool Matcher::init_array_count_is_in_bytes = false;
1419
1420// Threshold size for cleararray.
1421const int Matcher::init_array_short_size = 8 * BytesPerLong;
1422
1423// Needs 2 CMOV's for longs.
1424const int Matcher::long_cmove_cost() { return 1; }
1425
1426// No CMOVF/CMOVD with SSE/SSE2
1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428
1429// Does the CPU require late expand (see block.cpp for description of late expand)?
1430const bool Matcher::require_postalloc_expand = false;
1431
1432// Should the Matcher clone shifts on addressing modes, expecting them to
1433// be subsumed into complex addressing expressions or compute them into
1434// registers?  True for Intel but false for most RISCs
1435const bool Matcher::clone_shift_expressions = true;
1436
1437// Do we need to mask the count passed to shift instructions or does
1438// the cpu only look at the lower 5/6 bits anyway?
1439const bool Matcher::need_masked_shift_count = false;
1440
1441bool Matcher::narrow_oop_use_complex_address() {
1442  ShouldNotCallThis();
1443  return true;
1444}
1445
1446bool Matcher::narrow_klass_use_complex_address() {
1447  ShouldNotCallThis();
1448  return true;
1449}
1450
1451
1452// Is it better to copy float constants, or load them directly from memory?
1453// Intel can load a float constant from a direct address, requiring no
1454// extra registers.  Most RISCs will have to materialize an address into a
1455// register first, so they would do better to copy the constant from stack.
1456const bool Matcher::rematerialize_float_constants = true;
1457
1458// If CPU can load and store mis-aligned doubles directly then no fixup is
1459// needed.  Else we split the double into 2 integer pieces and move it
1460// piece-by-piece.  Only happens when passing doubles into C code as the
1461// Java calling convention forces doubles to be aligned.
1462const bool Matcher::misaligned_doubles_ok = true;
1463
1464
1465void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1466  // Get the memory operand from the node
1467  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1468  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1469  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1470  uint opcnt     = 1;                 // First operand
1471  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1472  while( idx >= skipped+num_edges ) {
1473    skipped += num_edges;
1474    opcnt++;                          // Bump operand count
1475    assert( opcnt < numopnds, "Accessing non-existent operand" );
1476    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1477  }
1478
1479  MachOper *memory = node->_opnds[opcnt];
1480  MachOper *new_memory = NULL;
1481  switch (memory->opcode()) {
1482  case DIRECT:
1483  case INDOFFSET32X:
1484    // No transformation necessary.
1485    return;
1486  case INDIRECT:
1487    new_memory = new indirect_win95_safeOper( );
1488    break;
1489  case INDOFFSET8:
1490    new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1491    break;
1492  case INDOFFSET32:
1493    new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1494    break;
1495  case INDINDEXOFFSET:
1496    new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1497    break;
1498  case INDINDEXSCALE:
1499    new_memory = new indIndexScale_win95_safeOper(memory->scale());
1500    break;
1501  case INDINDEXSCALEOFFSET:
1502    new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1503    break;
1504  case LOAD_LONG_INDIRECT:
1505  case LOAD_LONG_INDOFFSET32:
1506    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1507    return;
1508  default:
1509    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1510    return;
1511  }
1512  node->_opnds[opcnt] = new_memory;
1513}
1514
1515// Advertise here if the CPU requires explicit rounding operations
1516// to implement the UseStrictFP mode.
1517const bool Matcher::strict_fp_requires_explicit_rounding = true;
1518
1519// Are floats conerted to double when stored to stack during deoptimization?
1520// On x32 it is stored with convertion only when FPU is used for floats.
1521bool Matcher::float_in_double() { return (UseSSE == 0); }
1522
1523// Do ints take an entire long register or just half?
1524const bool Matcher::int_in_long = false;
1525
1526// Return whether or not this register is ever used as an argument.  This
1527// function is used on startup to build the trampoline stubs in generateOptoStub.
1528// Registers not mentioned will be killed by the VM call in the trampoline, and
1529// arguments in those registers not be available to the callee.
1530bool Matcher::can_be_java_arg( int reg ) {
1531  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1532  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1533  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1534  return false;
1535}
1536
1537bool Matcher::is_spillable_arg( int reg ) {
1538  return can_be_java_arg(reg);
1539}
1540
1541bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1542  // Use hardware integer DIV instruction when
1543  // it is faster than a code which use multiply.
1544  // Only when constant divisor fits into 32 bit
1545  // (min_jint is excluded to get only correct
1546  // positive 32 bit values from negative).
1547  return VM_Version::has_fast_idiv() &&
1548         (divisor == (int)divisor && divisor != min_jint);
1549}
1550
1551// Register for DIVI projection of divmodI
1552RegMask Matcher::divI_proj_mask() {
1553  return EAX_REG_mask();
1554}
1555
1556// Register for MODI projection of divmodI
1557RegMask Matcher::modI_proj_mask() {
1558  return EDX_REG_mask();
1559}
1560
1561// Register for DIVL projection of divmodL
1562RegMask Matcher::divL_proj_mask() {
1563  ShouldNotReachHere();
1564  return RegMask();
1565}
1566
1567// Register for MODL projection of divmodL
1568RegMask Matcher::modL_proj_mask() {
1569  ShouldNotReachHere();
1570  return RegMask();
1571}
1572
1573const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1574  return NO_REG_mask();
1575}
1576
1577// Returns true if the high 32 bits of the value is known to be zero.
1578bool is_operand_hi32_zero(Node* n) {
1579  int opc = n->Opcode();
1580  if (opc == Op_AndL) {
1581    Node* o2 = n->in(2);
1582    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1583      return true;
1584    }
1585  }
1586  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587    return true;
1588  }
1589  return false;
1590}
1591
1592%}
1593
1594//----------ENCODING BLOCK-----------------------------------------------------
1595// This block specifies the encoding classes used by the compiler to output
1596// byte streams.  Encoding classes generate functions which are called by
1597// Machine Instruction Nodes in order to generate the bit encoding of the
1598// instruction.  Operands specify their base encoding interface with the
1599// interface keyword.  There are currently supported four interfaces,
1600// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1601// operand to generate a function which returns its register number when
1602// queried.   CONST_INTER causes an operand to generate a function which
1603// returns the value of the constant when queried.  MEMORY_INTER causes an
1604// operand to generate four functions which return the Base Register, the
1605// Index Register, the Scale Value, and the Offset Value of the operand when
1606// queried.  COND_INTER causes an operand to generate six functions which
1607// return the encoding code (ie - encoding bits for the instruction)
1608// associated with each basic boolean condition for a conditional instruction.
1609// Instructions specify two basic values for encoding.  They use the
1610// ins_encode keyword to specify their encoding class (which must be one of
1611// the class names specified in the encoding block), and they use the
1612// opcode keyword to specify, in order, their primary, secondary, and
1613// tertiary opcode.  Only the opcode sections which a particular instruction
1614// needs for encoding need to be specified.
1615encode %{
1616  // Build emit functions for each basic byte or larger field in the intel
1617  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1618  // code in the enc_class source block.  Emit functions will live in the
1619  // main source block for now.  In future, we can generalize this by
1620  // adding a syntax that specifies the sizes of fields in an order,
1621  // so that the adlc can build the emit functions automagically
1622
1623  // Emit primary opcode
1624  enc_class OpcP %{
1625    emit_opcode(cbuf, $primary);
1626  %}
1627
1628  // Emit secondary opcode
1629  enc_class OpcS %{
1630    emit_opcode(cbuf, $secondary);
1631  %}
1632
1633  // Emit opcode directly
1634  enc_class Opcode(immI d8) %{
1635    emit_opcode(cbuf, $d8$$constant);
1636  %}
1637
1638  enc_class SizePrefix %{
1639    emit_opcode(cbuf,0x66);
1640  %}
1641
1642  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1643    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1644  %}
1645
1646  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1647    emit_opcode(cbuf,$opcode$$constant);
1648    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1649  %}
1650
1651  enc_class mov_r32_imm0( rRegI dst ) %{
1652    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1653    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1654  %}
1655
1656  enc_class cdq_enc %{
1657    // Full implementation of Java idiv and irem; checks for
1658    // special case as described in JVM spec., p.243 & p.271.
1659    //
1660    //         normal case                           special case
1661    //
1662    // input : rax,: dividend                         min_int
1663    //         reg: divisor                          -1
1664    //
1665    // output: rax,: quotient  (= rax, idiv reg)       min_int
1666    //         rdx: remainder (= rax, irem reg)       0
1667    //
1668    //  Code sequnce:
1669    //
1670    //  81 F8 00 00 00 80    cmp         rax,80000000h
1671    //  0F 85 0B 00 00 00    jne         normal_case
1672    //  33 D2                xor         rdx,edx
1673    //  83 F9 FF             cmp         rcx,0FFh
1674    //  0F 84 03 00 00 00    je          done
1675    //                  normal_case:
1676    //  99                   cdq
1677    //  F7 F9                idiv        rax,ecx
1678    //                  done:
1679    //
1680    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1681    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1682    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1683    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1684    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1685    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1686    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1687    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1688    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1689    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1690    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1691    // normal_case:
1692    emit_opcode(cbuf,0x99);                                         // cdq
1693    // idiv (note: must be emitted by the user of this rule)
1694    // normal:
1695  %}
1696
1697  // Dense encoding for older common ops
1698  enc_class Opc_plus(immI opcode, rRegI reg) %{
1699    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1700  %}
1701
1702
1703  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1704  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1705    // Check for 8-bit immediate, and set sign extend bit in opcode
1706    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1707      emit_opcode(cbuf, $primary | 0x02);
1708    }
1709    else {                          // If 32-bit immediate
1710      emit_opcode(cbuf, $primary);
1711    }
1712  %}
1713
1714  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1715    // Emit primary opcode and set sign-extend bit
1716    // Check for 8-bit immediate, and set sign extend bit in opcode
1717    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1718      emit_opcode(cbuf, $primary | 0x02);    }
1719    else {                          // If 32-bit immediate
1720      emit_opcode(cbuf, $primary);
1721    }
1722    // Emit r/m byte with secondary opcode, after primary opcode.
1723    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1724  %}
1725
1726  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1727    // Check for 8-bit immediate, and set sign extend bit in opcode
1728    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1729      $$$emit8$imm$$constant;
1730    }
1731    else {                          // If 32-bit immediate
1732      // Output immediate
1733      $$$emit32$imm$$constant;
1734    }
1735  %}
1736
1737  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1738    // Emit primary opcode and set sign-extend bit
1739    // Check for 8-bit immediate, and set sign extend bit in opcode
1740    int con = (int)$imm$$constant; // Throw away top bits
1741    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1742    // Emit r/m byte with secondary opcode, after primary opcode.
1743    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1744    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1745    else                               emit_d32(cbuf,con);
1746  %}
1747
1748  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1749    // Emit primary opcode and set sign-extend bit
1750    // Check for 8-bit immediate, and set sign extend bit in opcode
1751    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1752    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1753    // Emit r/m byte with tertiary opcode, after primary opcode.
1754    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1755    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1756    else                               emit_d32(cbuf,con);
1757  %}
1758
1759  enc_class OpcSReg (rRegI dst) %{    // BSWAP
1760    emit_cc(cbuf, $secondary, $dst$$reg );
1761  %}
1762
1763  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1764    int destlo = $dst$$reg;
1765    int desthi = HIGH_FROM_LOW(destlo);
1766    // bswap lo
1767    emit_opcode(cbuf, 0x0F);
1768    emit_cc(cbuf, 0xC8, destlo);
1769    // bswap hi
1770    emit_opcode(cbuf, 0x0F);
1771    emit_cc(cbuf, 0xC8, desthi);
1772    // xchg lo and hi
1773    emit_opcode(cbuf, 0x87);
1774    emit_rm(cbuf, 0x3, destlo, desthi);
1775  %}
1776
1777  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1778    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1779  %}
1780
1781  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782    $$$emit8$primary;
1783    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784  %}
1785
1786  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1787    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788    emit_d8(cbuf, op >> 8 );
1789    emit_d8(cbuf, op & 255);
1790  %}
1791
1792  // emulate a CMOV with a conditional branch around a MOV
1793  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794    // Invert sense of branch from sense of CMOV
1795    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796    emit_d8( cbuf, $brOffs$$constant );
1797  %}
1798
1799  enc_class enc_PartialSubtypeCheck( ) %{
1800    Register Redi = as_Register(EDI_enc); // result register
1801    Register Reax = as_Register(EAX_enc); // super class
1802    Register Recx = as_Register(ECX_enc); // killed
1803    Register Resi = as_Register(ESI_enc); // sub class
1804    Label miss;
1805
1806    MacroAssembler _masm(&cbuf);
1807    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808                                     NULL, &miss,
1809                                     /*set_cond_codes:*/ true);
1810    if ($primary) {
1811      __ xorptr(Redi, Redi);
1812    }
1813    __ bind(miss);
1814  %}
1815
1816  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1817    MacroAssembler masm(&cbuf);
1818    int start = masm.offset();
1819    if (UseSSE >= 2) {
1820      if (VerifyFPU) {
1821        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822      }
1823    } else {
1824      // External c_calling_convention expects the FPU stack to be 'clean'.
1825      // Compiled code leaves it dirty.  Do cleanup now.
1826      masm.empty_FPU_stack();
1827    }
1828    if (sizeof_FFree_Float_Stack_All == -1) {
1829      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830    } else {
1831      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832    }
1833  %}
1834
1835  enc_class Verify_FPU_For_Leaf %{
1836    if( VerifyFPU ) {
1837      MacroAssembler masm(&cbuf);
1838      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839    }
1840  %}
1841
1842  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843    // This is the instruction starting address for relocation info.
1844    cbuf.set_insts_mark();
1845    $$$emit8$primary;
1846    // CALL directly to the runtime
1847    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848                runtime_call_Relocation::spec(), RELOC_IMM32 );
1849
1850    if (UseSSE >= 2) {
1851      MacroAssembler _masm(&cbuf);
1852      BasicType rt = tf()->return_type();
1853
1854      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855        // A C runtime call where the return value is unused.  In SSE2+
1856        // mode the result needs to be removed from the FPU stack.  It's
1857        // likely that this function call could be removed by the
1858        // optimizer if the C function is a pure function.
1859        __ ffree(0);
1860      } else if (rt == T_FLOAT) {
1861        __ lea(rsp, Address(rsp, -4));
1862        __ fstp_s(Address(rsp, 0));
1863        __ movflt(xmm0, Address(rsp, 0));
1864        __ lea(rsp, Address(rsp,  4));
1865      } else if (rt == T_DOUBLE) {
1866        __ lea(rsp, Address(rsp, -8));
1867        __ fstp_d(Address(rsp, 0));
1868        __ movdbl(xmm0, Address(rsp, 0));
1869        __ lea(rsp, Address(rsp,  8));
1870      }
1871    }
1872  %}
1873
1874
1875  enc_class pre_call_resets %{
1876    // If method sets FPU control word restore it here
1877    debug_only(int off0 = cbuf.insts_size());
1878    if (ra_->C->in_24_bit_fp_mode()) {
1879      MacroAssembler _masm(&cbuf);
1880      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881    }
1882    if (ra_->C->max_vector_size() > 16) {
1883      // Clear upper bits of YMM registers when current compiled code uses
1884      // wide vectors to avoid AVX <-> SSE transition penalty during call.
1885      MacroAssembler _masm(&cbuf);
1886      __ vzeroupper();
1887    }
1888    debug_only(int off1 = cbuf.insts_size());
1889    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1890  %}
1891
1892  enc_class post_call_FPU %{
1893    // If method sets FPU control word do it here also
1894    if (Compile::current()->in_24_bit_fp_mode()) {
1895      MacroAssembler masm(&cbuf);
1896      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1897    }
1898  %}
1899
1900  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1901    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1902    // who we intended to call.
1903    cbuf.set_insts_mark();
1904    $$$emit8$primary;
1905    if (!_method) {
1906      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1907                     runtime_call_Relocation::spec(), RELOC_IMM32 );
1908    } else if (_optimized_virtual) {
1909      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1910                     opt_virtual_call_Relocation::spec(), RELOC_IMM32 );
1911    } else {
1912      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1913                     static_call_Relocation::spec(), RELOC_IMM32 );
1914    }
1915    if (_method) {  // Emit stub for static call.
1916      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1917      if (stub == NULL) {
1918        ciEnv::current()->record_failure("CodeCache is full");
1919        return;
1920      }
1921    }
1922  %}
1923
1924  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1925    MacroAssembler _masm(&cbuf);
1926    __ ic_call((address)$meth$$method);
1927  %}
1928
1929  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1930    int disp = in_bytes(Method::from_compiled_offset());
1931    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1932
1933    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1934    cbuf.set_insts_mark();
1935    $$$emit8$primary;
1936    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1937    emit_d8(cbuf, disp);             // Displacement
1938
1939  %}
1940
1941//   Following encoding is no longer used, but may be restored if calling
1942//   convention changes significantly.
1943//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1944//
1945//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1946//     // int ic_reg     = Matcher::inline_cache_reg();
1947//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1948//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1949//     // int imo_encode = Matcher::_regEncode[imo_reg];
1950//
1951//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1952//     // // so we load it immediately before the call
1953//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1954//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1955//
1956//     // xor rbp,ebp
1957//     emit_opcode(cbuf, 0x33);
1958//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1959//
1960//     // CALL to interpreter.
1961//     cbuf.set_insts_mark();
1962//     $$$emit8$primary;
1963//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1964//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1965//   %}
1966
1967  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1968    $$$emit8$primary;
1969    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1970    $$$emit8$shift$$constant;
1971  %}
1972
1973  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1974    // Load immediate does not have a zero or sign extended version
1975    // for 8-bit immediates
1976    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1977    $$$emit32$src$$constant;
1978  %}
1979
1980  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1981    // Load immediate does not have a zero or sign extended version
1982    // for 8-bit immediates
1983    emit_opcode(cbuf, $primary + $dst$$reg);
1984    $$$emit32$src$$constant;
1985  %}
1986
1987  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1988    // Load immediate does not have a zero or sign extended version
1989    // for 8-bit immediates
1990    int dst_enc = $dst$$reg;
1991    int src_con = $src$$constant & 0x0FFFFFFFFL;
1992    if (src_con == 0) {
1993      // xor dst, dst
1994      emit_opcode(cbuf, 0x33);
1995      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1996    } else {
1997      emit_opcode(cbuf, $primary + dst_enc);
1998      emit_d32(cbuf, src_con);
1999    }
2000  %}
2001
2002  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2003    // Load immediate does not have a zero or sign extended version
2004    // for 8-bit immediates
2005    int dst_enc = $dst$$reg + 2;
2006    int src_con = ((julong)($src$$constant)) >> 32;
2007    if (src_con == 0) {
2008      // xor dst, dst
2009      emit_opcode(cbuf, 0x33);
2010      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2011    } else {
2012      emit_opcode(cbuf, $primary + dst_enc);
2013      emit_d32(cbuf, src_con);
2014    }
2015  %}
2016
2017
2018  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2019  enc_class enc_Copy( rRegI dst, rRegI src ) %{
2020    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2021  %}
2022
2023  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2024    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2025  %}
2026
2027  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2028    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2029  %}
2030
2031  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2032    $$$emit8$primary;
2033    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2034  %}
2035
2036  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2037    $$$emit8$secondary;
2038    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2039  %}
2040
2041  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2042    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2043  %}
2044
2045  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2046    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2047  %}
2048
2049  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2050    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2051  %}
2052
2053  enc_class Con32 (immI src) %{    // Con32(storeImmI)
2054    // Output immediate
2055    $$$emit32$src$$constant;
2056  %}
2057
2058  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2059    // Output Float immediate bits
2060    jfloat jf = $src$$constant;
2061    int    jf_as_bits = jint_cast( jf );
2062    emit_d32(cbuf, jf_as_bits);
2063  %}
2064
2065  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2066    // Output Float immediate bits
2067    jfloat jf = $src$$constant;
2068    int    jf_as_bits = jint_cast( jf );
2069    emit_d32(cbuf, jf_as_bits);
2070  %}
2071
2072  enc_class Con16 (immI src) %{    // Con16(storeImmI)
2073    // Output immediate
2074    $$$emit16$src$$constant;
2075  %}
2076
2077  enc_class Con_d32(immI src) %{
2078    emit_d32(cbuf,$src$$constant);
2079  %}
2080
2081  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2082    // Output immediate memory reference
2083    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2084    emit_d32(cbuf, 0x00);
2085  %}
2086
2087  enc_class lock_prefix( ) %{
2088    if( os::is_MP() )
2089      emit_opcode(cbuf,0xF0);         // [Lock]
2090  %}
2091
2092  // Cmp-xchg long value.
2093  // Note: we need to swap rbx, and rcx before and after the
2094  //       cmpxchg8 instruction because the instruction uses
2095  //       rcx as the high order word of the new value to store but
2096  //       our register encoding uses rbx,.
2097  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2098
2099    // XCHG  rbx,ecx
2100    emit_opcode(cbuf,0x87);
2101    emit_opcode(cbuf,0xD9);
2102    // [Lock]
2103    if( os::is_MP() )
2104      emit_opcode(cbuf,0xF0);
2105    // CMPXCHG8 [Eptr]
2106    emit_opcode(cbuf,0x0F);
2107    emit_opcode(cbuf,0xC7);
2108    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2109    // XCHG  rbx,ecx
2110    emit_opcode(cbuf,0x87);
2111    emit_opcode(cbuf,0xD9);
2112  %}
2113
2114  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2115    // [Lock]
2116    if( os::is_MP() )
2117      emit_opcode(cbuf,0xF0);
2118
2119    // CMPXCHG [Eptr]
2120    emit_opcode(cbuf,0x0F);
2121    emit_opcode(cbuf,0xB1);
2122    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2123  %}
2124
2125  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2126    int res_encoding = $res$$reg;
2127
2128    // MOV  res,0
2129    emit_opcode( cbuf, 0xB8 + res_encoding);
2130    emit_d32( cbuf, 0 );
2131    // JNE,s  fail
2132    emit_opcode(cbuf,0x75);
2133    emit_d8(cbuf, 5 );
2134    // MOV  res,1
2135    emit_opcode( cbuf, 0xB8 + res_encoding);
2136    emit_d32( cbuf, 1 );
2137    // fail:
2138  %}
2139
2140  enc_class set_instruction_start( ) %{
2141    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2142  %}
2143
2144  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2145    int reg_encoding = $ereg$$reg;
2146    int base  = $mem$$base;
2147    int index = $mem$$index;
2148    int scale = $mem$$scale;
2149    int displace = $mem$$disp;
2150    relocInfo::relocType disp_reloc = $mem->disp_reloc();
2151    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2152  %}
2153
2154  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2155    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2156    int base  = $mem$$base;
2157    int index = $mem$$index;
2158    int scale = $mem$$scale;
2159    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2160    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2161    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2162  %}
2163
2164  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2165    int r1, r2;
2166    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2167    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2168    emit_opcode(cbuf,0x0F);
2169    emit_opcode(cbuf,$tertiary);
2170    emit_rm(cbuf, 0x3, r1, r2);
2171    emit_d8(cbuf,$cnt$$constant);
2172    emit_d8(cbuf,$primary);
2173    emit_rm(cbuf, 0x3, $secondary, r1);
2174    emit_d8(cbuf,$cnt$$constant);
2175  %}
2176
2177  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2178    emit_opcode( cbuf, 0x8B ); // Move
2179    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2180    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2181      emit_d8(cbuf,$primary);
2182      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2183      emit_d8(cbuf,$cnt$$constant-32);
2184    }
2185    emit_d8(cbuf,$primary);
2186    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2187    emit_d8(cbuf,31);
2188  %}
2189
2190  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2191    int r1, r2;
2192    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2193    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2194
2195    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2196    emit_rm(cbuf, 0x3, r1, r2);
2197    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2198      emit_opcode(cbuf,$primary);
2199      emit_rm(cbuf, 0x3, $secondary, r1);
2200      emit_d8(cbuf,$cnt$$constant-32);
2201    }
2202    emit_opcode(cbuf,0x33);  // XOR r2,r2
2203    emit_rm(cbuf, 0x3, r2, r2);
2204  %}
2205
2206  // Clone of RegMem but accepts an extra parameter to access each
2207  // half of a double in memory; it never needs relocation info.
2208  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2209    emit_opcode(cbuf,$opcode$$constant);
2210    int reg_encoding = $rm_reg$$reg;
2211    int base     = $mem$$base;
2212    int index    = $mem$$index;
2213    int scale    = $mem$$scale;
2214    int displace = $mem$$disp + $disp_for_half$$constant;
2215    relocInfo::relocType disp_reloc = relocInfo::none;
2216    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2217  %}
2218
2219  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2220  //
2221  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2222  // and it never needs relocation information.
2223  // Frequently used to move data between FPU's Stack Top and memory.
2224  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2225    int rm_byte_opcode = $rm_opcode$$constant;
2226    int base     = $mem$$base;
2227    int index    = $mem$$index;
2228    int scale    = $mem$$scale;
2229    int displace = $mem$$disp;
2230    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2231    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2232  %}
2233
2234  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2235    int rm_byte_opcode = $rm_opcode$$constant;
2236    int base     = $mem$$base;
2237    int index    = $mem$$index;
2238    int scale    = $mem$$scale;
2239    int displace = $mem$$disp;
2240    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2241    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2242  %}
2243
2244  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2245    int reg_encoding = $dst$$reg;
2246    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2247    int index        = 0x04;            // 0x04 indicates no index
2248    int scale        = 0x00;            // 0x00 indicates no scale
2249    int displace     = $src1$$constant; // 0x00 indicates no displacement
2250    relocInfo::relocType disp_reloc = relocInfo::none;
2251    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2252  %}
2253
2254  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2255    // Compare dst,src
2256    emit_opcode(cbuf,0x3B);
2257    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2258    // jmp dst < src around move
2259    emit_opcode(cbuf,0x7C);
2260    emit_d8(cbuf,2);
2261    // move dst,src
2262    emit_opcode(cbuf,0x8B);
2263    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2264  %}
2265
2266  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2267    // Compare dst,src
2268    emit_opcode(cbuf,0x3B);
2269    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2270    // jmp dst > src around move
2271    emit_opcode(cbuf,0x7F);
2272    emit_d8(cbuf,2);
2273    // move dst,src
2274    emit_opcode(cbuf,0x8B);
2275    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2276  %}
2277
2278  enc_class enc_FPR_store(memory mem, regDPR src) %{
2279    // If src is FPR1, we can just FST to store it.
2280    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2281    int reg_encoding = 0x2; // Just store
2282    int base  = $mem$$base;
2283    int index = $mem$$index;
2284    int scale = $mem$$scale;
2285    int displace = $mem$$disp;
2286    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2287    if( $src$$reg != FPR1L_enc ) {
2288      reg_encoding = 0x3;  // Store & pop
2289      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2290      emit_d8( cbuf, 0xC0-1+$src$$reg );
2291    }
2292    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2293    emit_opcode(cbuf,$primary);
2294    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2295  %}
2296
2297  enc_class neg_reg(rRegI dst) %{
2298    // NEG $dst
2299    emit_opcode(cbuf,0xF7);
2300    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2301  %}
2302
2303  enc_class setLT_reg(eCXRegI dst) %{
2304    // SETLT $dst
2305    emit_opcode(cbuf,0x0F);
2306    emit_opcode(cbuf,0x9C);
2307    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2308  %}
2309
2310  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2311    int tmpReg = $tmp$$reg;
2312
2313    // SUB $p,$q
2314    emit_opcode(cbuf,0x2B);
2315    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2316    // SBB $tmp,$tmp
2317    emit_opcode(cbuf,0x1B);
2318    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2319    // AND $tmp,$y
2320    emit_opcode(cbuf,0x23);
2321    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2322    // ADD $p,$tmp
2323    emit_opcode(cbuf,0x03);
2324    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2325  %}
2326
2327  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2328    // TEST shift,32
2329    emit_opcode(cbuf,0xF7);
2330    emit_rm(cbuf, 0x3, 0, ECX_enc);
2331    emit_d32(cbuf,0x20);
2332    // JEQ,s small
2333    emit_opcode(cbuf, 0x74);
2334    emit_d8(cbuf, 0x04);
2335    // MOV    $dst.hi,$dst.lo
2336    emit_opcode( cbuf, 0x8B );
2337    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2338    // CLR    $dst.lo
2339    emit_opcode(cbuf, 0x33);
2340    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2341// small:
2342    // SHLD   $dst.hi,$dst.lo,$shift
2343    emit_opcode(cbuf,0x0F);
2344    emit_opcode(cbuf,0xA5);
2345    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2346    // SHL    $dst.lo,$shift"
2347    emit_opcode(cbuf,0xD3);
2348    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2349  %}
2350
2351  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2352    // TEST shift,32
2353    emit_opcode(cbuf,0xF7);
2354    emit_rm(cbuf, 0x3, 0, ECX_enc);
2355    emit_d32(cbuf,0x20);
2356    // JEQ,s small
2357    emit_opcode(cbuf, 0x74);
2358    emit_d8(cbuf, 0x04);
2359    // MOV    $dst.lo,$dst.hi
2360    emit_opcode( cbuf, 0x8B );
2361    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2362    // CLR    $dst.hi
2363    emit_opcode(cbuf, 0x33);
2364    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2365// small:
2366    // SHRD   $dst.lo,$dst.hi,$shift
2367    emit_opcode(cbuf,0x0F);
2368    emit_opcode(cbuf,0xAD);
2369    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2370    // SHR    $dst.hi,$shift"
2371    emit_opcode(cbuf,0xD3);
2372    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2373  %}
2374
2375  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2376    // TEST shift,32
2377    emit_opcode(cbuf,0xF7);
2378    emit_rm(cbuf, 0x3, 0, ECX_enc);
2379    emit_d32(cbuf,0x20);
2380    // JEQ,s small
2381    emit_opcode(cbuf, 0x74);
2382    emit_d8(cbuf, 0x05);
2383    // MOV    $dst.lo,$dst.hi
2384    emit_opcode( cbuf, 0x8B );
2385    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2386    // SAR    $dst.hi,31
2387    emit_opcode(cbuf, 0xC1);
2388    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2389    emit_d8(cbuf, 0x1F );
2390// small:
2391    // SHRD   $dst.lo,$dst.hi,$shift
2392    emit_opcode(cbuf,0x0F);
2393    emit_opcode(cbuf,0xAD);
2394    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2395    // SAR    $dst.hi,$shift"
2396    emit_opcode(cbuf,0xD3);
2397    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2398  %}
2399
2400
2401  // ----------------- Encodings for floating point unit -----------------
2402  // May leave result in FPU-TOS or FPU reg depending on opcodes
2403  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2404    $$$emit8$primary;
2405    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2406  %}
2407
2408  // Pop argument in FPR0 with FSTP ST(0)
2409  enc_class PopFPU() %{
2410    emit_opcode( cbuf, 0xDD );
2411    emit_d8( cbuf, 0xD8 );
2412  %}
2413
2414  // !!!!! equivalent to Pop_Reg_F
2415  enc_class Pop_Reg_DPR( regDPR dst ) %{
2416    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2417    emit_d8( cbuf, 0xD8+$dst$$reg );
2418  %}
2419
2420  enc_class Push_Reg_DPR( regDPR dst ) %{
2421    emit_opcode( cbuf, 0xD9 );
2422    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2423  %}
2424
2425  enc_class strictfp_bias1( regDPR dst ) %{
2426    emit_opcode( cbuf, 0xDB );           // FLD m80real
2427    emit_opcode( cbuf, 0x2D );
2428    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2429    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2430    emit_opcode( cbuf, 0xC8+$dst$$reg );
2431  %}
2432
2433  enc_class strictfp_bias2( regDPR dst ) %{
2434    emit_opcode( cbuf, 0xDB );           // FLD m80real
2435    emit_opcode( cbuf, 0x2D );
2436    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2437    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2438    emit_opcode( cbuf, 0xC8+$dst$$reg );
2439  %}
2440
2441  // Special case for moving an integer register to a stack slot.
2442  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2443    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2444  %}
2445
2446  // Special case for moving a register to a stack slot.
2447  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2448    // Opcode already emitted
2449    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2450    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2451    emit_d32(cbuf, $dst$$disp);   // Displacement
2452  %}
2453
2454  // Push the integer in stackSlot 'src' onto FP-stack
2455  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2456    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2457  %}
2458
2459  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2460  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2461    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2462  %}
2463
2464  // Same as Pop_Mem_F except for opcode
2465  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2466  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2467    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2468  %}
2469
2470  enc_class Pop_Reg_FPR( regFPR dst ) %{
2471    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2472    emit_d8( cbuf, 0xD8+$dst$$reg );
2473  %}
2474
2475  enc_class Push_Reg_FPR( regFPR dst ) %{
2476    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2477    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2478  %}
2479
2480  // Push FPU's float to a stack-slot, and pop FPU-stack
2481  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2482    int pop = 0x02;
2483    if ($src$$reg != FPR1L_enc) {
2484      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2485      emit_d8( cbuf, 0xC0-1+$src$$reg );
2486      pop = 0x03;
2487    }
2488    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2489  %}
2490
2491  // Push FPU's double to a stack-slot, and pop FPU-stack
2492  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2493    int pop = 0x02;
2494    if ($src$$reg != FPR1L_enc) {
2495      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2496      emit_d8( cbuf, 0xC0-1+$src$$reg );
2497      pop = 0x03;
2498    }
2499    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2500  %}
2501
2502  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2503  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2504    int pop = 0xD0 - 1; // -1 since we skip FLD
2505    if ($src$$reg != FPR1L_enc) {
2506      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2507      emit_d8( cbuf, 0xC0-1+$src$$reg );
2508      pop = 0xD8;
2509    }
2510    emit_opcode( cbuf, 0xDD );
2511    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2512  %}
2513
2514
2515  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2516    // load dst in FPR0
2517    emit_opcode( cbuf, 0xD9 );
2518    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2519    if ($src$$reg != FPR1L_enc) {
2520      // fincstp
2521      emit_opcode (cbuf, 0xD9);
2522      emit_opcode (cbuf, 0xF7);
2523      // swap src with FPR1:
2524      // FXCH FPR1 with src
2525      emit_opcode(cbuf, 0xD9);
2526      emit_d8(cbuf, 0xC8-1+$src$$reg );
2527      // fdecstp
2528      emit_opcode (cbuf, 0xD9);
2529      emit_opcode (cbuf, 0xF6);
2530    }
2531  %}
2532
2533  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2534    MacroAssembler _masm(&cbuf);
2535    __ subptr(rsp, 8);
2536    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2537    __ fld_d(Address(rsp, 0));
2538    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2539    __ fld_d(Address(rsp, 0));
2540  %}
2541
2542  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2543    MacroAssembler _masm(&cbuf);
2544    __ subptr(rsp, 4);
2545    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2546    __ fld_s(Address(rsp, 0));
2547    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2548    __ fld_s(Address(rsp, 0));
2549  %}
2550
2551  enc_class Push_ResultD(regD dst) %{
2552    MacroAssembler _masm(&cbuf);
2553    __ fstp_d(Address(rsp, 0));
2554    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2555    __ addptr(rsp, 8);
2556  %}
2557
2558  enc_class Push_ResultF(regF dst, immI d8) %{
2559    MacroAssembler _masm(&cbuf);
2560    __ fstp_s(Address(rsp, 0));
2561    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2562    __ addptr(rsp, $d8$$constant);
2563  %}
2564
2565  enc_class Push_SrcD(regD src) %{
2566    MacroAssembler _masm(&cbuf);
2567    __ subptr(rsp, 8);
2568    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2569    __ fld_d(Address(rsp, 0));
2570  %}
2571
2572  enc_class push_stack_temp_qword() %{
2573    MacroAssembler _masm(&cbuf);
2574    __ subptr(rsp, 8);
2575  %}
2576
2577  enc_class pop_stack_temp_qword() %{
2578    MacroAssembler _masm(&cbuf);
2579    __ addptr(rsp, 8);
2580  %}
2581
2582  enc_class push_xmm_to_fpr1(regD src) %{
2583    MacroAssembler _masm(&cbuf);
2584    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2585    __ fld_d(Address(rsp, 0));
2586  %}
2587
2588  enc_class Push_Result_Mod_DPR( regDPR src) %{
2589    if ($src$$reg != FPR1L_enc) {
2590      // fincstp
2591      emit_opcode (cbuf, 0xD9);
2592      emit_opcode (cbuf, 0xF7);
2593      // FXCH FPR1 with src
2594      emit_opcode(cbuf, 0xD9);
2595      emit_d8(cbuf, 0xC8-1+$src$$reg );
2596      // fdecstp
2597      emit_opcode (cbuf, 0xD9);
2598      emit_opcode (cbuf, 0xF6);
2599    }
2600    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2601    // // FSTP   FPR$dst$$reg
2602    // emit_opcode( cbuf, 0xDD );
2603    // emit_d8( cbuf, 0xD8+$dst$$reg );
2604  %}
2605
2606  enc_class fnstsw_sahf_skip_parity() %{
2607    // fnstsw ax
2608    emit_opcode( cbuf, 0xDF );
2609    emit_opcode( cbuf, 0xE0 );
2610    // sahf
2611    emit_opcode( cbuf, 0x9E );
2612    // jnp  ::skip
2613    emit_opcode( cbuf, 0x7B );
2614    emit_opcode( cbuf, 0x05 );
2615  %}
2616
2617  enc_class emitModDPR() %{
2618    // fprem must be iterative
2619    // :: loop
2620    // fprem
2621    emit_opcode( cbuf, 0xD9 );
2622    emit_opcode( cbuf, 0xF8 );
2623    // wait
2624    emit_opcode( cbuf, 0x9b );
2625    // fnstsw ax
2626    emit_opcode( cbuf, 0xDF );
2627    emit_opcode( cbuf, 0xE0 );
2628    // sahf
2629    emit_opcode( cbuf, 0x9E );
2630    // jp  ::loop
2631    emit_opcode( cbuf, 0x0F );
2632    emit_opcode( cbuf, 0x8A );
2633    emit_opcode( cbuf, 0xF4 );
2634    emit_opcode( cbuf, 0xFF );
2635    emit_opcode( cbuf, 0xFF );
2636    emit_opcode( cbuf, 0xFF );
2637  %}
2638
2639  enc_class fpu_flags() %{
2640    // fnstsw_ax
2641    emit_opcode( cbuf, 0xDF);
2642    emit_opcode( cbuf, 0xE0);
2643    // test ax,0x0400
2644    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2645    emit_opcode( cbuf, 0xA9 );
2646    emit_d16   ( cbuf, 0x0400 );
2647    // // // This sequence works, but stalls for 12-16 cycles on PPro
2648    // // test rax,0x0400
2649    // emit_opcode( cbuf, 0xA9 );
2650    // emit_d32   ( cbuf, 0x00000400 );
2651    //
2652    // jz exit (no unordered comparison)
2653    emit_opcode( cbuf, 0x74 );
2654    emit_d8    ( cbuf, 0x02 );
2655    // mov ah,1 - treat as LT case (set carry flag)
2656    emit_opcode( cbuf, 0xB4 );
2657    emit_d8    ( cbuf, 0x01 );
2658    // sahf
2659    emit_opcode( cbuf, 0x9E);
2660  %}
2661
2662  enc_class cmpF_P6_fixup() %{
2663    // Fixup the integer flags in case comparison involved a NaN
2664    //
2665    // JNP exit (no unordered comparison, P-flag is set by NaN)
2666    emit_opcode( cbuf, 0x7B );
2667    emit_d8    ( cbuf, 0x03 );
2668    // MOV AH,1 - treat as LT case (set carry flag)
2669    emit_opcode( cbuf, 0xB4 );
2670    emit_d8    ( cbuf, 0x01 );
2671    // SAHF
2672    emit_opcode( cbuf, 0x9E);
2673    // NOP     // target for branch to avoid branch to branch
2674    emit_opcode( cbuf, 0x90);
2675  %}
2676
2677//     fnstsw_ax();
2678//     sahf();
2679//     movl(dst, nan_result);
2680//     jcc(Assembler::parity, exit);
2681//     movl(dst, less_result);
2682//     jcc(Assembler::below, exit);
2683//     movl(dst, equal_result);
2684//     jcc(Assembler::equal, exit);
2685//     movl(dst, greater_result);
2686
2687// less_result     =  1;
2688// greater_result  = -1;
2689// equal_result    = 0;
2690// nan_result      = -1;
2691
2692  enc_class CmpF_Result(rRegI dst) %{
2693    // fnstsw_ax();
2694    emit_opcode( cbuf, 0xDF);
2695    emit_opcode( cbuf, 0xE0);
2696    // sahf
2697    emit_opcode( cbuf, 0x9E);
2698    // movl(dst, nan_result);
2699    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2700    emit_d32( cbuf, -1 );
2701    // jcc(Assembler::parity, exit);
2702    emit_opcode( cbuf, 0x7A );
2703    emit_d8    ( cbuf, 0x13 );
2704    // movl(dst, less_result);
2705    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2706    emit_d32( cbuf, -1 );
2707    // jcc(Assembler::below, exit);
2708    emit_opcode( cbuf, 0x72 );
2709    emit_d8    ( cbuf, 0x0C );
2710    // movl(dst, equal_result);
2711    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2712    emit_d32( cbuf, 0 );
2713    // jcc(Assembler::equal, exit);
2714    emit_opcode( cbuf, 0x74 );
2715    emit_d8    ( cbuf, 0x05 );
2716    // movl(dst, greater_result);
2717    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2718    emit_d32( cbuf, 1 );
2719  %}
2720
2721
2722  // Compare the longs and set flags
2723  // BROKEN!  Do Not use as-is
2724  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2725    // CMP    $src1.hi,$src2.hi
2726    emit_opcode( cbuf, 0x3B );
2727    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2728    // JNE,s  done
2729    emit_opcode(cbuf,0x75);
2730    emit_d8(cbuf, 2 );
2731    // CMP    $src1.lo,$src2.lo
2732    emit_opcode( cbuf, 0x3B );
2733    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2734// done:
2735  %}
2736
2737  enc_class convert_int_long( regL dst, rRegI src ) %{
2738    // mov $dst.lo,$src
2739    int dst_encoding = $dst$$reg;
2740    int src_encoding = $src$$reg;
2741    encode_Copy( cbuf, dst_encoding  , src_encoding );
2742    // mov $dst.hi,$src
2743    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2744    // sar $dst.hi,31
2745    emit_opcode( cbuf, 0xC1 );
2746    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2747    emit_d8(cbuf, 0x1F );
2748  %}
2749
2750  enc_class convert_long_double( eRegL src ) %{
2751    // push $src.hi
2752    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2753    // push $src.lo
2754    emit_opcode(cbuf, 0x50+$src$$reg  );
2755    // fild 64-bits at [SP]
2756    emit_opcode(cbuf,0xdf);
2757    emit_d8(cbuf, 0x6C);
2758    emit_d8(cbuf, 0x24);
2759    emit_d8(cbuf, 0x00);
2760    // pop stack
2761    emit_opcode(cbuf, 0x83); // add  SP, #8
2762    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2763    emit_d8(cbuf, 0x8);
2764  %}
2765
2766  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2767    // IMUL   EDX:EAX,$src1
2768    emit_opcode( cbuf, 0xF7 );
2769    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2770    // SAR    EDX,$cnt-32
2771    int shift_count = ((int)$cnt$$constant) - 32;
2772    if (shift_count > 0) {
2773      emit_opcode(cbuf, 0xC1);
2774      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2775      emit_d8(cbuf, shift_count);
2776    }
2777  %}
2778
2779  // this version doesn't have add sp, 8
2780  enc_class convert_long_double2( eRegL src ) %{
2781    // push $src.hi
2782    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2783    // push $src.lo
2784    emit_opcode(cbuf, 0x50+$src$$reg  );
2785    // fild 64-bits at [SP]
2786    emit_opcode(cbuf,0xdf);
2787    emit_d8(cbuf, 0x6C);
2788    emit_d8(cbuf, 0x24);
2789    emit_d8(cbuf, 0x00);
2790  %}
2791
2792  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2793    // Basic idea: long = (long)int * (long)int
2794    // IMUL EDX:EAX, src
2795    emit_opcode( cbuf, 0xF7 );
2796    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2797  %}
2798
2799  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2800    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2801    // MUL EDX:EAX, src
2802    emit_opcode( cbuf, 0xF7 );
2803    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2804  %}
2805
2806  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2807    // Basic idea: lo(result) = lo(x_lo * y_lo)
2808    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2809    // MOV    $tmp,$src.lo
2810    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2811    // IMUL   $tmp,EDX
2812    emit_opcode( cbuf, 0x0F );
2813    emit_opcode( cbuf, 0xAF );
2814    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2815    // MOV    EDX,$src.hi
2816    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2817    // IMUL   EDX,EAX
2818    emit_opcode( cbuf, 0x0F );
2819    emit_opcode( cbuf, 0xAF );
2820    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2821    // ADD    $tmp,EDX
2822    emit_opcode( cbuf, 0x03 );
2823    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2824    // MUL   EDX:EAX,$src.lo
2825    emit_opcode( cbuf, 0xF7 );
2826    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2827    // ADD    EDX,ESI
2828    emit_opcode( cbuf, 0x03 );
2829    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2830  %}
2831
2832  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2833    // Basic idea: lo(result) = lo(src * y_lo)
2834    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2835    // IMUL   $tmp,EDX,$src
2836    emit_opcode( cbuf, 0x6B );
2837    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2838    emit_d8( cbuf, (int)$src$$constant );
2839    // MOV    EDX,$src
2840    emit_opcode(cbuf, 0xB8 + EDX_enc);
2841    emit_d32( cbuf, (int)$src$$constant );
2842    // MUL   EDX:EAX,EDX
2843    emit_opcode( cbuf, 0xF7 );
2844    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2845    // ADD    EDX,ESI
2846    emit_opcode( cbuf, 0x03 );
2847    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2848  %}
2849
2850  enc_class long_div( eRegL src1, eRegL src2 ) %{
2851    // PUSH src1.hi
2852    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2853    // PUSH src1.lo
2854    emit_opcode(cbuf,               0x50+$src1$$reg  );
2855    // PUSH src2.hi
2856    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2857    // PUSH src2.lo
2858    emit_opcode(cbuf,               0x50+$src2$$reg  );
2859    // CALL directly to the runtime
2860    cbuf.set_insts_mark();
2861    emit_opcode(cbuf,0xE8);       // Call into runtime
2862    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2863    // Restore stack
2864    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2865    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2866    emit_d8(cbuf, 4*4);
2867  %}
2868
2869  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2870    // PUSH src1.hi
2871    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2872    // PUSH src1.lo
2873    emit_opcode(cbuf,               0x50+$src1$$reg  );
2874    // PUSH src2.hi
2875    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2876    // PUSH src2.lo
2877    emit_opcode(cbuf,               0x50+$src2$$reg  );
2878    // CALL directly to the runtime
2879    cbuf.set_insts_mark();
2880    emit_opcode(cbuf,0xE8);       // Call into runtime
2881    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2882    // Restore stack
2883    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2884    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2885    emit_d8(cbuf, 4*4);
2886  %}
2887
2888  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2889    // MOV   $tmp,$src.lo
2890    emit_opcode(cbuf, 0x8B);
2891    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2892    // OR    $tmp,$src.hi
2893    emit_opcode(cbuf, 0x0B);
2894    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2895  %}
2896
2897  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2898    // CMP    $src1.lo,$src2.lo
2899    emit_opcode( cbuf, 0x3B );
2900    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2901    // JNE,s  skip
2902    emit_cc(cbuf, 0x70, 0x5);
2903    emit_d8(cbuf,2);
2904    // CMP    $src1.hi,$src2.hi
2905    emit_opcode( cbuf, 0x3B );
2906    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2907  %}
2908
2909  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2910    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2911    emit_opcode( cbuf, 0x3B );
2912    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2913    // MOV    $tmp,$src1.hi
2914    emit_opcode( cbuf, 0x8B );
2915    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2916    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2917    emit_opcode( cbuf, 0x1B );
2918    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2919  %}
2920
2921  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2922    // XOR    $tmp,$tmp
2923    emit_opcode(cbuf,0x33);  // XOR
2924    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2925    // CMP    $tmp,$src.lo
2926    emit_opcode( cbuf, 0x3B );
2927    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2928    // SBB    $tmp,$src.hi
2929    emit_opcode( cbuf, 0x1B );
2930    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2931  %}
2932
2933 // Sniff, sniff... smells like Gnu Superoptimizer
2934  enc_class neg_long( eRegL dst ) %{
2935    emit_opcode(cbuf,0xF7);    // NEG hi
2936    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2937    emit_opcode(cbuf,0xF7);    // NEG lo
2938    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2939    emit_opcode(cbuf,0x83);    // SBB hi,0
2940    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2941    emit_d8    (cbuf,0 );
2942  %}
2943
2944  enc_class enc_pop_rdx() %{
2945    emit_opcode(cbuf,0x5A);
2946  %}
2947
2948  enc_class enc_rethrow() %{
2949    cbuf.set_insts_mark();
2950    emit_opcode(cbuf, 0xE9);        // jmp    entry
2951    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2952                   runtime_call_Relocation::spec(), RELOC_IMM32 );
2953  %}
2954
2955
2956  // Convert a double to an int.  Java semantics require we do complex
2957  // manglelations in the corner cases.  So we set the rounding mode to
2958  // 'zero', store the darned double down as an int, and reset the
2959  // rounding mode to 'nearest'.  The hardware throws an exception which
2960  // patches up the correct value directly to the stack.
2961  enc_class DPR2I_encoding( regDPR src ) %{
2962    // Flip to round-to-zero mode.  We attempted to allow invalid-op
2963    // exceptions here, so that a NAN or other corner-case value will
2964    // thrown an exception (but normal values get converted at full speed).
2965    // However, I2C adapters and other float-stack manglers leave pending
2966    // invalid-op exceptions hanging.  We would have to clear them before
2967    // enabling them and that is more expensive than just testing for the
2968    // invalid value Intel stores down in the corner cases.
2969    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2970    emit_opcode(cbuf,0x2D);
2971    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2972    // Allocate a word
2973    emit_opcode(cbuf,0x83);            // SUB ESP,4
2974    emit_opcode(cbuf,0xEC);
2975    emit_d8(cbuf,0x04);
2976    // Encoding assumes a double has been pushed into FPR0.
2977    // Store down the double as an int, popping the FPU stack
2978    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2979    emit_opcode(cbuf,0x1C);
2980    emit_d8(cbuf,0x24);
2981    // Restore the rounding mode; mask the exception
2982    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2983    emit_opcode(cbuf,0x2D);
2984    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2985        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2986        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2987
2988    // Load the converted int; adjust CPU stack
2989    emit_opcode(cbuf,0x58);       // POP EAX
2990    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2991    emit_d32   (cbuf,0x80000000); //         0x80000000
2992    emit_opcode(cbuf,0x75);       // JNE around_slow_call
2993    emit_d8    (cbuf,0x07);       // Size of slow_call
2994    // Push src onto stack slow-path
2995    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2996    emit_d8    (cbuf,0xC0-1+$src$$reg );
2997    // CALL directly to the runtime
2998    cbuf.set_insts_mark();
2999    emit_opcode(cbuf,0xE8);       // Call into runtime
3000    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3001    // Carry on here...
3002  %}
3003
3004  enc_class DPR2L_encoding( regDPR src ) %{
3005    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3006    emit_opcode(cbuf,0x2D);
3007    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3008    // Allocate a word
3009    emit_opcode(cbuf,0x83);            // SUB ESP,8
3010    emit_opcode(cbuf,0xEC);
3011    emit_d8(cbuf,0x08);
3012    // Encoding assumes a double has been pushed into FPR0.
3013    // Store down the double as a long, popping the FPU stack
3014    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3015    emit_opcode(cbuf,0x3C);
3016    emit_d8(cbuf,0x24);
3017    // Restore the rounding mode; mask the exception
3018    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3019    emit_opcode(cbuf,0x2D);
3020    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3021        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3022        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3023
3024    // Load the converted int; adjust CPU stack
3025    emit_opcode(cbuf,0x58);       // POP EAX
3026    emit_opcode(cbuf,0x5A);       // POP EDX
3027    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3028    emit_d8    (cbuf,0xFA);       // rdx
3029    emit_d32   (cbuf,0x80000000); //         0x80000000
3030    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3031    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3032    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3033    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3034    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3035    emit_d8    (cbuf,0x07);       // Size of slow_call
3036    // Push src onto stack slow-path
3037    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3038    emit_d8    (cbuf,0xC0-1+$src$$reg );
3039    // CALL directly to the runtime
3040    cbuf.set_insts_mark();
3041    emit_opcode(cbuf,0xE8);       // Call into runtime
3042    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3043    // Carry on here...
3044  %}
3045
3046  enc_class FMul_ST_reg( eRegFPR src1 ) %{
3047    // Operand was loaded from memory into fp ST (stack top)
3048    // FMUL   ST,$src  /* D8 C8+i */
3049    emit_opcode(cbuf, 0xD8);
3050    emit_opcode(cbuf, 0xC8 + $src1$$reg);
3051  %}
3052
3053  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3054    // FADDP  ST,src2  /* D8 C0+i */
3055    emit_opcode(cbuf, 0xD8);
3056    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3057    //could use FADDP  src2,fpST  /* DE C0+i */
3058  %}
3059
3060  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3061    // FADDP  src2,ST  /* DE C0+i */
3062    emit_opcode(cbuf, 0xDE);
3063    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3064  %}
3065
3066  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3067    // Operand has been loaded into fp ST (stack top)
3068      // FSUB   ST,$src1
3069      emit_opcode(cbuf, 0xD8);
3070      emit_opcode(cbuf, 0xE0 + $src1$$reg);
3071
3072      // FDIV
3073      emit_opcode(cbuf, 0xD8);
3074      emit_opcode(cbuf, 0xF0 + $src2$$reg);
3075  %}
3076
3077  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3078    // Operand was loaded from memory into fp ST (stack top)
3079    // FADD   ST,$src  /* D8 C0+i */
3080    emit_opcode(cbuf, 0xD8);
3081    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3082
3083    // FMUL  ST,src2  /* D8 C*+i */
3084    emit_opcode(cbuf, 0xD8);
3085    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3086  %}
3087
3088
3089  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3090    // Operand was loaded from memory into fp ST (stack top)
3091    // FADD   ST,$src  /* D8 C0+i */
3092    emit_opcode(cbuf, 0xD8);
3093    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3094
3095    // FMULP  src2,ST  /* DE C8+i */
3096    emit_opcode(cbuf, 0xDE);
3097    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3098  %}
3099
3100  // Atomically load the volatile long
3101  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3102    emit_opcode(cbuf,0xDF);
3103    int rm_byte_opcode = 0x05;
3104    int base     = $mem$$base;
3105    int index    = $mem$$index;
3106    int scale    = $mem$$scale;
3107    int displace = $mem$$disp;
3108    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3109    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3110    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3111  %}
3112
3113  // Volatile Store Long.  Must be atomic, so move it into
3114  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3115  // target address before the store (for null-ptr checks)
3116  // so the memory operand is used twice in the encoding.
3117  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3118    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3119    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3120    emit_opcode(cbuf,0xDF);
3121    int rm_byte_opcode = 0x07;
3122    int base     = $mem$$base;
3123    int index    = $mem$$index;
3124    int scale    = $mem$$scale;
3125    int displace = $mem$$disp;
3126    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3127    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3128  %}
3129
3130  // Safepoint Poll.  This polls the safepoint page, and causes an
3131  // exception if it is not readable. Unfortunately, it kills the condition code
3132  // in the process
3133  // We current use TESTL [spp],EDI
3134  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3135
3136  enc_class Safepoint_Poll() %{
3137    cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3138    emit_opcode(cbuf,0x85);
3139    emit_rm (cbuf, 0x0, 0x7, 0x5);
3140    emit_d32(cbuf, (intptr_t)os::get_polling_page());
3141  %}
3142%}
3143
3144
3145//----------FRAME--------------------------------------------------------------
3146// Definition of frame structure and management information.
3147//
3148//  S T A C K   L A Y O U T    Allocators stack-slot number
3149//                             |   (to get allocators register number
3150//  G  Owned by    |        |  v    add OptoReg::stack0())
3151//  r   CALLER     |        |
3152//  o     |        +--------+      pad to even-align allocators stack-slot
3153//  w     V        |  pad0  |        numbers; owned by CALLER
3154//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3155//  h     ^        |   in   |  5
3156//        |        |  args  |  4   Holes in incoming args owned by SELF
3157//  |     |        |        |  3
3158//  |     |        +--------+
3159//  V     |        | old out|      Empty on Intel, window on Sparc
3160//        |    old |preserve|      Must be even aligned.
3161//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3162//        |        |   in   |  3   area for Intel ret address
3163//     Owned by    |preserve|      Empty on Sparc.
3164//       SELF      +--------+
3165//        |        |  pad2  |  2   pad to align old SP
3166//        |        +--------+  1
3167//        |        | locks  |  0
3168//        |        +--------+----> OptoReg::stack0(), even aligned
3169//        |        |  pad1  | 11   pad to align new SP
3170//        |        +--------+
3171//        |        |        | 10
3172//        |        | spills |  9   spills
3173//        V        |        |  8   (pad0 slot for callee)
3174//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3175//        ^        |  out   |  7
3176//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3177//     Owned by    +--------+
3178//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3179//        |    new |preserve|      Must be even-aligned.
3180//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3181//        |        |        |
3182//
3183// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3184//         known from SELF's arguments and the Java calling convention.
3185//         Region 6-7 is determined per call site.
3186// Note 2: If the calling convention leaves holes in the incoming argument
3187//         area, those holes are owned by SELF.  Holes in the outgoing area
3188//         are owned by the CALLEE.  Holes should not be nessecary in the
3189//         incoming area, as the Java calling convention is completely under
3190//         the control of the AD file.  Doubles can be sorted and packed to
3191//         avoid holes.  Holes in the outgoing arguments may be nessecary for
3192//         varargs C calling conventions.
3193// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3194//         even aligned with pad0 as needed.
3195//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3196//         region 6-11 is even aligned; it may be padded out more so that
3197//         the region from SP to FP meets the minimum stack alignment.
3198
3199frame %{
3200  // What direction does stack grow in (assumed to be same for C & Java)
3201  stack_direction(TOWARDS_LOW);
3202
3203  // These three registers define part of the calling convention
3204  // between compiled code and the interpreter.
3205  inline_cache_reg(EAX);                // Inline Cache Register
3206  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3207
3208  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3209  cisc_spilling_operand_name(indOffset32);
3210
3211  // Number of stack slots consumed by locking an object
3212  sync_stack_slots(1);
3213
3214  // Compiled code's Frame Pointer
3215  frame_pointer(ESP);
3216  // Interpreter stores its frame pointer in a register which is
3217  // stored to the stack by I2CAdaptors.
3218  // I2CAdaptors convert from interpreted java to compiled java.
3219  interpreter_frame_pointer(EBP);
3220
3221  // Stack alignment requirement
3222  // Alignment size in bytes (128-bit -> 16 bytes)
3223  stack_alignment(StackAlignmentInBytes);
3224
3225  // Number of stack slots between incoming argument block and the start of
3226  // a new frame.  The PROLOG must add this many slots to the stack.  The
3227  // EPILOG must remove this many slots.  Intel needs one slot for
3228  // return address and one for rbp, (must save rbp)
3229  in_preserve_stack_slots(2+VerifyStackAtCalls);
3230
3231  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3232  // for calls to C.  Supports the var-args backing area for register parms.
3233  varargs_C_out_slots_killed(0);
3234
3235  // The after-PROLOG location of the return address.  Location of
3236  // return address specifies a type (REG or STACK) and a number
3237  // representing the register number (i.e. - use a register name) or
3238  // stack slot.
3239  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3240  // Otherwise, it is above the locks and verification slot and alignment word
3241  return_addr(STACK - 1 +
3242              round_to((Compile::current()->in_preserve_stack_slots() +
3243                        Compile::current()->fixed_slots()),
3244                       stack_alignment_in_slots()));
3245
3246  // Body of function which returns an integer array locating
3247  // arguments either in registers or in stack slots.  Passed an array
3248  // of ideal registers called "sig" and a "length" count.  Stack-slot
3249  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3250  // arguments for a CALLEE.  Incoming stack arguments are
3251  // automatically biased by the preserve_stack_slots field above.
3252  calling_convention %{
3253    // No difference between ingoing/outgoing just pass false
3254    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3255  %}
3256
3257
3258  // Body of function which returns an integer array locating
3259  // arguments either in registers or in stack slots.  Passed an array
3260  // of ideal registers called "sig" and a "length" count.  Stack-slot
3261  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3262  // arguments for a CALLEE.  Incoming stack arguments are
3263  // automatically biased by the preserve_stack_slots field above.
3264  c_calling_convention %{
3265    // This is obviously always outgoing
3266    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3267  %}
3268
3269  // Location of C & interpreter return values
3270  c_return_value %{
3271    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3272    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3273    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3274
3275    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3276    // that C functions return float and double results in XMM0.
3277    if( ideal_reg == Op_RegD && UseSSE>=2 )
3278      return OptoRegPair(XMM0b_num,XMM0_num);
3279    if( ideal_reg == Op_RegF && UseSSE>=2 )
3280      return OptoRegPair(OptoReg::Bad,XMM0_num);
3281
3282    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3283  %}
3284
3285  // Location of return values
3286  return_value %{
3287    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3288    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3289    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3290    if( ideal_reg == Op_RegD && UseSSE>=2 )
3291      return OptoRegPair(XMM0b_num,XMM0_num);
3292    if( ideal_reg == Op_RegF && UseSSE>=1 )
3293      return OptoRegPair(OptoReg::Bad,XMM0_num);
3294    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3295  %}
3296
3297%}
3298
3299//----------ATTRIBUTES---------------------------------------------------------
3300//----------Operand Attributes-------------------------------------------------
3301op_attrib op_cost(0);        // Required cost attribute
3302
3303//----------Instruction Attributes---------------------------------------------
3304ins_attrib ins_cost(100);       // Required cost attribute
3305ins_attrib ins_size(8);         // Required size attribute (in bits)
3306ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3307                                // non-matching short branch variant of some
3308                                                            // long branch?
3309ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3310                                // specifies the alignment that some part of the instruction (not
3311                                // necessarily the start) requires.  If > 1, a compute_padding()
3312                                // function must be provided for the instruction
3313
3314//----------OPERANDS-----------------------------------------------------------
3315// Operand definitions must precede instruction definitions for correct parsing
3316// in the ADLC because operands constitute user defined types which are used in
3317// instruction definitions.
3318
3319//----------Simple Operands----------------------------------------------------
3320// Immediate Operands
3321// Integer Immediate
3322operand immI() %{
3323  match(ConI);
3324
3325  op_cost(10);
3326  format %{ %}
3327  interface(CONST_INTER);
3328%}
3329
3330// Constant for test vs zero
3331operand immI0() %{
3332  predicate(n->get_int() == 0);
3333  match(ConI);
3334
3335  op_cost(0);
3336  format %{ %}
3337  interface(CONST_INTER);
3338%}
3339
3340// Constant for increment
3341operand immI1() %{
3342  predicate(n->get_int() == 1);
3343  match(ConI);
3344
3345  op_cost(0);
3346  format %{ %}
3347  interface(CONST_INTER);
3348%}
3349
3350// Constant for decrement
3351operand immI_M1() %{
3352  predicate(n->get_int() == -1);
3353  match(ConI);
3354
3355  op_cost(0);
3356  format %{ %}
3357  interface(CONST_INTER);
3358%}
3359
3360// Valid scale values for addressing modes
3361operand immI2() %{
3362  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3363  match(ConI);
3364
3365  format %{ %}
3366  interface(CONST_INTER);
3367%}
3368
3369operand immI8() %{
3370  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3371  match(ConI);
3372
3373  op_cost(5);
3374  format %{ %}
3375  interface(CONST_INTER);
3376%}
3377
3378operand immI16() %{
3379  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3380  match(ConI);
3381
3382  op_cost(10);
3383  format %{ %}
3384  interface(CONST_INTER);
3385%}
3386
3387// Int Immediate non-negative
3388operand immU31()
3389%{
3390  predicate(n->get_int() >= 0);
3391  match(ConI);
3392
3393  op_cost(0);
3394  format %{ %}
3395  interface(CONST_INTER);
3396%}
3397
3398// Constant for long shifts
3399operand immI_32() %{
3400  predicate( n->get_int() == 32 );
3401  match(ConI);
3402
3403  op_cost(0);
3404  format %{ %}
3405  interface(CONST_INTER);
3406%}
3407
3408operand immI_1_31() %{
3409  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3410  match(ConI);
3411
3412  op_cost(0);
3413  format %{ %}
3414  interface(CONST_INTER);
3415%}
3416
3417operand immI_32_63() %{
3418  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3419  match(ConI);
3420  op_cost(0);
3421
3422  format %{ %}
3423  interface(CONST_INTER);
3424%}
3425
3426operand immI_1() %{
3427  predicate( n->get_int() == 1 );
3428  match(ConI);
3429
3430  op_cost(0);
3431  format %{ %}
3432  interface(CONST_INTER);
3433%}
3434
3435operand immI_2() %{
3436  predicate( n->get_int() == 2 );
3437  match(ConI);
3438
3439  op_cost(0);
3440  format %{ %}
3441  interface(CONST_INTER);
3442%}
3443
3444operand immI_3() %{
3445  predicate( n->get_int() == 3 );
3446  match(ConI);
3447
3448  op_cost(0);
3449  format %{ %}
3450  interface(CONST_INTER);
3451%}
3452
3453// Pointer Immediate
3454operand immP() %{
3455  match(ConP);
3456
3457  op_cost(10);
3458  format %{ %}
3459  interface(CONST_INTER);
3460%}
3461
3462// NULL Pointer Immediate
3463operand immP0() %{
3464  predicate( n->get_ptr() == 0 );
3465  match(ConP);
3466  op_cost(0);
3467
3468  format %{ %}
3469  interface(CONST_INTER);
3470%}
3471
3472// Long Immediate
3473operand immL() %{
3474  match(ConL);
3475
3476  op_cost(20);
3477  format %{ %}
3478  interface(CONST_INTER);
3479%}
3480
3481// Long Immediate zero
3482operand immL0() %{
3483  predicate( n->get_long() == 0L );
3484  match(ConL);
3485  op_cost(0);
3486
3487  format %{ %}
3488  interface(CONST_INTER);
3489%}
3490
3491// Long Immediate zero
3492operand immL_M1() %{
3493  predicate( n->get_long() == -1L );
3494  match(ConL);
3495  op_cost(0);
3496
3497  format %{ %}
3498  interface(CONST_INTER);
3499%}
3500
3501// Long immediate from 0 to 127.
3502// Used for a shorter form of long mul by 10.
3503operand immL_127() %{
3504  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3505  match(ConL);
3506  op_cost(0);
3507
3508  format %{ %}
3509  interface(CONST_INTER);
3510%}
3511
3512// Long Immediate: low 32-bit mask
3513operand immL_32bits() %{
3514  predicate(n->get_long() == 0xFFFFFFFFL);
3515  match(ConL);
3516  op_cost(0);
3517
3518  format %{ %}
3519  interface(CONST_INTER);
3520%}
3521
3522// Long Immediate: low 32-bit mask
3523operand immL32() %{
3524  predicate(n->get_long() == (int)(n->get_long()));
3525  match(ConL);
3526  op_cost(20);
3527
3528  format %{ %}
3529  interface(CONST_INTER);
3530%}
3531
3532//Double Immediate zero
3533operand immDPR0() %{
3534  // Do additional (and counter-intuitive) test against NaN to work around VC++
3535  // bug that generates code such that NaNs compare equal to 0.0
3536  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3537  match(ConD);
3538
3539  op_cost(5);
3540  format %{ %}
3541  interface(CONST_INTER);
3542%}
3543
3544// Double Immediate one
3545operand immDPR1() %{
3546  predicate( UseSSE<=1 && n->getd() == 1.0 );
3547  match(ConD);
3548
3549  op_cost(5);
3550  format %{ %}
3551  interface(CONST_INTER);
3552%}
3553
3554// Double Immediate
3555operand immDPR() %{
3556  predicate(UseSSE<=1);
3557  match(ConD);
3558
3559  op_cost(5);
3560  format %{ %}
3561  interface(CONST_INTER);
3562%}
3563
3564operand immD() %{
3565  predicate(UseSSE>=2);
3566  match(ConD);
3567
3568  op_cost(5);
3569  format %{ %}
3570  interface(CONST_INTER);
3571%}
3572
3573// Double Immediate zero
3574operand immD0() %{
3575  // Do additional (and counter-intuitive) test against NaN to work around VC++
3576  // bug that generates code such that NaNs compare equal to 0.0 AND do not
3577  // compare equal to -0.0.
3578  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3579  match(ConD);
3580
3581  format %{ %}
3582  interface(CONST_INTER);
3583%}
3584
3585// Float Immediate zero
3586operand immFPR0() %{
3587  predicate(UseSSE == 0 && n->getf() == 0.0F);
3588  match(ConF);
3589
3590  op_cost(5);
3591  format %{ %}
3592  interface(CONST_INTER);
3593%}
3594
3595// Float Immediate one
3596operand immFPR1() %{
3597  predicate(UseSSE == 0 && n->getf() == 1.0F);
3598  match(ConF);
3599
3600  op_cost(5);
3601  format %{ %}
3602  interface(CONST_INTER);
3603%}
3604
3605// Float Immediate
3606operand immFPR() %{
3607  predicate( UseSSE == 0 );
3608  match(ConF);
3609
3610  op_cost(5);
3611  format %{ %}
3612  interface(CONST_INTER);
3613%}
3614
3615// Float Immediate
3616operand immF() %{
3617  predicate(UseSSE >= 1);
3618  match(ConF);
3619
3620  op_cost(5);
3621  format %{ %}
3622  interface(CONST_INTER);
3623%}
3624
3625// Float Immediate zero.  Zero and not -0.0
3626operand immF0() %{
3627  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3628  match(ConF);
3629
3630  op_cost(5);
3631  format %{ %}
3632  interface(CONST_INTER);
3633%}
3634
3635// Immediates for special shifts (sign extend)
3636
3637// Constants for increment
3638operand immI_16() %{
3639  predicate( n->get_int() == 16 );
3640  match(ConI);
3641
3642  format %{ %}
3643  interface(CONST_INTER);
3644%}
3645
3646operand immI_24() %{
3647  predicate( n->get_int() == 24 );
3648  match(ConI);
3649
3650  format %{ %}
3651  interface(CONST_INTER);
3652%}
3653
3654// Constant for byte-wide masking
3655operand immI_255() %{
3656  predicate( n->get_int() == 255 );
3657  match(ConI);
3658
3659  format %{ %}
3660  interface(CONST_INTER);
3661%}
3662
3663// Constant for short-wide masking
3664operand immI_65535() %{
3665  predicate(n->get_int() == 65535);
3666  match(ConI);
3667
3668  format %{ %}
3669  interface(CONST_INTER);
3670%}
3671
3672// Register Operands
3673// Integer Register
3674operand rRegI() %{
3675  constraint(ALLOC_IN_RC(int_reg));
3676  match(RegI);
3677  match(xRegI);
3678  match(eAXRegI);
3679  match(eBXRegI);
3680  match(eCXRegI);
3681  match(eDXRegI);
3682  match(eDIRegI);
3683  match(eSIRegI);
3684
3685  format %{ %}
3686  interface(REG_INTER);
3687%}
3688
3689// Subset of Integer Register
3690operand xRegI(rRegI reg) %{
3691  constraint(ALLOC_IN_RC(int_x_reg));
3692  match(reg);
3693  match(eAXRegI);
3694  match(eBXRegI);
3695  match(eCXRegI);
3696  match(eDXRegI);
3697
3698  format %{ %}
3699  interface(REG_INTER);
3700%}
3701
3702// Special Registers
3703operand eAXRegI(xRegI reg) %{
3704  constraint(ALLOC_IN_RC(eax_reg));
3705  match(reg);
3706  match(rRegI);
3707
3708  format %{ "EAX" %}
3709  interface(REG_INTER);
3710%}
3711
3712// Special Registers
3713operand eBXRegI(xRegI reg) %{
3714  constraint(ALLOC_IN_RC(ebx_reg));
3715  match(reg);
3716  match(rRegI);
3717
3718  format %{ "EBX" %}
3719  interface(REG_INTER);
3720%}
3721
3722operand eCXRegI(xRegI reg) %{
3723  constraint(ALLOC_IN_RC(ecx_reg));
3724  match(reg);
3725  match(rRegI);
3726
3727  format %{ "ECX" %}
3728  interface(REG_INTER);
3729%}
3730
3731operand eDXRegI(xRegI reg) %{
3732  constraint(ALLOC_IN_RC(edx_reg));
3733  match(reg);
3734  match(rRegI);
3735
3736  format %{ "EDX" %}
3737  interface(REG_INTER);
3738%}
3739
3740operand eDIRegI(xRegI reg) %{
3741  constraint(ALLOC_IN_RC(edi_reg));
3742  match(reg);
3743  match(rRegI);
3744
3745  format %{ "EDI" %}
3746  interface(REG_INTER);
3747%}
3748
3749operand naxRegI() %{
3750  constraint(ALLOC_IN_RC(nax_reg));
3751  match(RegI);
3752  match(eCXRegI);
3753  match(eDXRegI);
3754  match(eSIRegI);
3755  match(eDIRegI);
3756
3757  format %{ %}
3758  interface(REG_INTER);
3759%}
3760
3761operand nadxRegI() %{
3762  constraint(ALLOC_IN_RC(nadx_reg));
3763  match(RegI);
3764  match(eBXRegI);
3765  match(eCXRegI);
3766  match(eSIRegI);
3767  match(eDIRegI);
3768
3769  format %{ %}
3770  interface(REG_INTER);
3771%}
3772
3773operand ncxRegI() %{
3774  constraint(ALLOC_IN_RC(ncx_reg));
3775  match(RegI);
3776  match(eAXRegI);
3777  match(eDXRegI);
3778  match(eSIRegI);
3779  match(eDIRegI);
3780
3781  format %{ %}
3782  interface(REG_INTER);
3783%}
3784
3785// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3786// //
3787operand eSIRegI(xRegI reg) %{
3788   constraint(ALLOC_IN_RC(esi_reg));
3789   match(reg);
3790   match(rRegI);
3791
3792   format %{ "ESI" %}
3793   interface(REG_INTER);
3794%}
3795
3796// Pointer Register
3797operand anyRegP() %{
3798  constraint(ALLOC_IN_RC(any_reg));
3799  match(RegP);
3800  match(eAXRegP);
3801  match(eBXRegP);
3802  match(eCXRegP);
3803  match(eDIRegP);
3804  match(eRegP);
3805
3806  format %{ %}
3807  interface(REG_INTER);
3808%}
3809
3810operand eRegP() %{
3811  constraint(ALLOC_IN_RC(int_reg));
3812  match(RegP);
3813  match(eAXRegP);
3814  match(eBXRegP);
3815  match(eCXRegP);
3816  match(eDIRegP);
3817
3818  format %{ %}
3819  interface(REG_INTER);
3820%}
3821
3822// On windows95, EBP is not safe to use for implicit null tests.
3823operand eRegP_no_EBP() %{
3824  constraint(ALLOC_IN_RC(int_reg_no_ebp));
3825  match(RegP);
3826  match(eAXRegP);
3827  match(eBXRegP);
3828  match(eCXRegP);
3829  match(eDIRegP);
3830
3831  op_cost(100);
3832  format %{ %}
3833  interface(REG_INTER);
3834%}
3835
3836operand naxRegP() %{
3837  constraint(ALLOC_IN_RC(nax_reg));
3838  match(RegP);
3839  match(eBXRegP);
3840  match(eDXRegP);
3841  match(eCXRegP);
3842  match(eSIRegP);
3843  match(eDIRegP);
3844
3845  format %{ %}
3846  interface(REG_INTER);
3847%}
3848
3849operand nabxRegP() %{
3850  constraint(ALLOC_IN_RC(nabx_reg));
3851  match(RegP);
3852  match(eCXRegP);
3853  match(eDXRegP);
3854  match(eSIRegP);
3855  match(eDIRegP);
3856
3857  format %{ %}
3858  interface(REG_INTER);
3859%}
3860
3861operand pRegP() %{
3862  constraint(ALLOC_IN_RC(p_reg));
3863  match(RegP);
3864  match(eBXRegP);
3865  match(eDXRegP);
3866  match(eSIRegP);
3867  match(eDIRegP);
3868
3869  format %{ %}
3870  interface(REG_INTER);
3871%}
3872
3873// Special Registers
3874// Return a pointer value
3875operand eAXRegP(eRegP reg) %{
3876  constraint(ALLOC_IN_RC(eax_reg));
3877  match(reg);
3878  format %{ "EAX" %}
3879  interface(REG_INTER);
3880%}
3881
3882// Used in AtomicAdd
3883operand eBXRegP(eRegP reg) %{
3884  constraint(ALLOC_IN_RC(ebx_reg));
3885  match(reg);
3886  format %{ "EBX" %}
3887  interface(REG_INTER);
3888%}
3889
3890// Tail-call (interprocedural jump) to interpreter
3891operand eCXRegP(eRegP reg) %{
3892  constraint(ALLOC_IN_RC(ecx_reg));
3893  match(reg);
3894  format %{ "ECX" %}
3895  interface(REG_INTER);
3896%}
3897
3898operand eSIRegP(eRegP reg) %{
3899  constraint(ALLOC_IN_RC(esi_reg));
3900  match(reg);
3901  format %{ "ESI" %}
3902  interface(REG_INTER);
3903%}
3904
3905// Used in rep stosw
3906operand eDIRegP(eRegP reg) %{
3907  constraint(ALLOC_IN_RC(edi_reg));
3908  match(reg);
3909  format %{ "EDI" %}
3910  interface(REG_INTER);
3911%}
3912
3913operand eRegL() %{
3914  constraint(ALLOC_IN_RC(long_reg));
3915  match(RegL);
3916  match(eADXRegL);
3917
3918  format %{ %}
3919  interface(REG_INTER);
3920%}
3921
3922operand eADXRegL( eRegL reg ) %{
3923  constraint(ALLOC_IN_RC(eadx_reg));
3924  match(reg);
3925
3926  format %{ "EDX:EAX" %}
3927  interface(REG_INTER);
3928%}
3929
3930operand eBCXRegL( eRegL reg ) %{
3931  constraint(ALLOC_IN_RC(ebcx_reg));
3932  match(reg);
3933
3934  format %{ "EBX:ECX" %}
3935  interface(REG_INTER);
3936%}
3937
3938// Special case for integer high multiply
3939operand eADXRegL_low_only() %{
3940  constraint(ALLOC_IN_RC(eadx_reg));
3941  match(RegL);
3942
3943  format %{ "EAX" %}
3944  interface(REG_INTER);
3945%}
3946
3947// Flags register, used as output of compare instructions
3948operand eFlagsReg() %{
3949  constraint(ALLOC_IN_RC(int_flags));
3950  match(RegFlags);
3951
3952  format %{ "EFLAGS" %}
3953  interface(REG_INTER);
3954%}
3955
3956// Flags register, used as output of FLOATING POINT compare instructions
3957operand eFlagsRegU() %{
3958  constraint(ALLOC_IN_RC(int_flags));
3959  match(RegFlags);
3960
3961  format %{ "EFLAGS_U" %}
3962  interface(REG_INTER);
3963%}
3964
3965operand eFlagsRegUCF() %{
3966  constraint(ALLOC_IN_RC(int_flags));
3967  match(RegFlags);
3968  predicate(false);
3969
3970  format %{ "EFLAGS_U_CF" %}
3971  interface(REG_INTER);
3972%}
3973
3974// Condition Code Register used by long compare
3975operand flagsReg_long_LTGE() %{
3976  constraint(ALLOC_IN_RC(int_flags));
3977  match(RegFlags);
3978  format %{ "FLAGS_LTGE" %}
3979  interface(REG_INTER);
3980%}
3981operand flagsReg_long_EQNE() %{
3982  constraint(ALLOC_IN_RC(int_flags));
3983  match(RegFlags);
3984  format %{ "FLAGS_EQNE" %}
3985  interface(REG_INTER);
3986%}
3987operand flagsReg_long_LEGT() %{
3988  constraint(ALLOC_IN_RC(int_flags));
3989  match(RegFlags);
3990  format %{ "FLAGS_LEGT" %}
3991  interface(REG_INTER);
3992%}
3993
3994// Float register operands
3995operand regDPR() %{
3996  predicate( UseSSE < 2 );
3997  constraint(ALLOC_IN_RC(fp_dbl_reg));
3998  match(RegD);
3999  match(regDPR1);
4000  match(regDPR2);
4001  format %{ %}
4002  interface(REG_INTER);
4003%}
4004
4005operand regDPR1(regDPR reg) %{
4006  predicate( UseSSE < 2 );
4007  constraint(ALLOC_IN_RC(fp_dbl_reg0));
4008  match(reg);
4009  format %{ "FPR1" %}
4010  interface(REG_INTER);
4011%}
4012
4013operand regDPR2(regDPR reg) %{
4014  predicate( UseSSE < 2 );
4015  constraint(ALLOC_IN_RC(fp_dbl_reg1));
4016  match(reg);
4017  format %{ "FPR2" %}
4018  interface(REG_INTER);
4019%}
4020
4021operand regnotDPR1(regDPR reg) %{
4022  predicate( UseSSE < 2 );
4023  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4024  match(reg);
4025  format %{ %}
4026  interface(REG_INTER);
4027%}
4028
4029// Float register operands
4030operand regFPR() %{
4031  predicate( UseSSE < 2 );
4032  constraint(ALLOC_IN_RC(fp_flt_reg));
4033  match(RegF);
4034  match(regFPR1);
4035  format %{ %}
4036  interface(REG_INTER);
4037%}
4038
4039// Float register operands
4040operand regFPR1(regFPR reg) %{
4041  predicate( UseSSE < 2 );
4042  constraint(ALLOC_IN_RC(fp_flt_reg0));
4043  match(reg);
4044  format %{ "FPR1" %}
4045  interface(REG_INTER);
4046%}
4047
4048// XMM Float register operands
4049operand regF() %{
4050  predicate( UseSSE>=1 );
4051  constraint(ALLOC_IN_RC(float_reg_legacy));
4052  match(RegF);
4053  format %{ %}
4054  interface(REG_INTER);
4055%}
4056
4057// XMM Double register operands
4058operand regD() %{
4059  predicate( UseSSE>=2 );
4060  constraint(ALLOC_IN_RC(double_reg_legacy));
4061  match(RegD);
4062  format %{ %}
4063  interface(REG_INTER);
4064%}
4065
4066// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4067// runtime code generation via reg_class_dynamic.
4068operand vecS() %{
4069  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4070  match(VecS);
4071
4072  format %{ %}
4073  interface(REG_INTER);
4074%}
4075
4076operand vecD() %{
4077  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4078  match(VecD);
4079
4080  format %{ %}
4081  interface(REG_INTER);
4082%}
4083
4084operand vecX() %{
4085  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4086  match(VecX);
4087
4088  format %{ %}
4089  interface(REG_INTER);
4090%}
4091
4092operand vecY() %{
4093  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4094  match(VecY);
4095
4096  format %{ %}
4097  interface(REG_INTER);
4098%}
4099
4100//----------Memory Operands----------------------------------------------------
4101// Direct Memory Operand
4102operand direct(immP addr) %{
4103  match(addr);
4104
4105  format %{ "[$addr]" %}
4106  interface(MEMORY_INTER) %{
4107    base(0xFFFFFFFF);
4108    index(0x4);
4109    scale(0x0);
4110    disp($addr);
4111  %}
4112%}
4113
4114// Indirect Memory Operand
4115operand indirect(eRegP reg) %{
4116  constraint(ALLOC_IN_RC(int_reg));
4117  match(reg);
4118
4119  format %{ "[$reg]" %}
4120  interface(MEMORY_INTER) %{
4121    base($reg);
4122    index(0x4);
4123    scale(0x0);
4124    disp(0x0);
4125  %}
4126%}
4127
4128// Indirect Memory Plus Short Offset Operand
4129operand indOffset8(eRegP reg, immI8 off) %{
4130  match(AddP reg off);
4131
4132  format %{ "[$reg + $off]" %}
4133  interface(MEMORY_INTER) %{
4134    base($reg);
4135    index(0x4);
4136    scale(0x0);
4137    disp($off);
4138  %}
4139%}
4140
4141// Indirect Memory Plus Long Offset Operand
4142operand indOffset32(eRegP reg, immI off) %{
4143  match(AddP reg off);
4144
4145  format %{ "[$reg + $off]" %}
4146  interface(MEMORY_INTER) %{
4147    base($reg);
4148    index(0x4);
4149    scale(0x0);
4150    disp($off);
4151  %}
4152%}
4153
4154// Indirect Memory Plus Long Offset Operand
4155operand indOffset32X(rRegI reg, immP off) %{
4156  match(AddP off reg);
4157
4158  format %{ "[$reg + $off]" %}
4159  interface(MEMORY_INTER) %{
4160    base($reg);
4161    index(0x4);
4162    scale(0x0);
4163    disp($off);
4164  %}
4165%}
4166
4167// Indirect Memory Plus Index Register Plus Offset Operand
4168operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4169  match(AddP (AddP reg ireg) off);
4170
4171  op_cost(10);
4172  format %{"[$reg + $off + $ireg]" %}
4173  interface(MEMORY_INTER) %{
4174    base($reg);
4175    index($ireg);
4176    scale(0x0);
4177    disp($off);
4178  %}
4179%}
4180
4181// Indirect Memory Plus Index Register Plus Offset Operand
4182operand indIndex(eRegP reg, rRegI ireg) %{
4183  match(AddP reg ireg);
4184
4185  op_cost(10);
4186  format %{"[$reg + $ireg]" %}
4187  interface(MEMORY_INTER) %{
4188    base($reg);
4189    index($ireg);
4190    scale(0x0);
4191    disp(0x0);
4192  %}
4193%}
4194
4195// // -------------------------------------------------------------------------
4196// // 486 architecture doesn't support "scale * index + offset" with out a base
4197// // -------------------------------------------------------------------------
4198// // Scaled Memory Operands
4199// // Indirect Memory Times Scale Plus Offset Operand
4200// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4201//   match(AddP off (LShiftI ireg scale));
4202//
4203//   op_cost(10);
4204//   format %{"[$off + $ireg << $scale]" %}
4205//   interface(MEMORY_INTER) %{
4206//     base(0x4);
4207//     index($ireg);
4208//     scale($scale);
4209//     disp($off);
4210//   %}
4211// %}
4212
4213// Indirect Memory Times Scale Plus Index Register
4214operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4215  match(AddP reg (LShiftI ireg scale));
4216
4217  op_cost(10);
4218  format %{"[$reg + $ireg << $scale]" %}
4219  interface(MEMORY_INTER) %{
4220    base($reg);
4221    index($ireg);
4222    scale($scale);
4223    disp(0x0);
4224  %}
4225%}
4226
4227// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4228operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4229  match(AddP (AddP reg (LShiftI ireg scale)) off);
4230
4231  op_cost(10);
4232  format %{"[$reg + $off + $ireg << $scale]" %}
4233  interface(MEMORY_INTER) %{
4234    base($reg);
4235    index($ireg);
4236    scale($scale);
4237    disp($off);
4238  %}
4239%}
4240
4241//----------Load Long Memory Operands------------------------------------------
4242// The load-long idiom will use it's address expression again after loading
4243// the first word of the long.  If the load-long destination overlaps with
4244// registers used in the addressing expression, the 2nd half will be loaded
4245// from a clobbered address.  Fix this by requiring that load-long use
4246// address registers that do not overlap with the load-long target.
4247
4248// load-long support
4249operand load_long_RegP() %{
4250  constraint(ALLOC_IN_RC(esi_reg));
4251  match(RegP);
4252  match(eSIRegP);
4253  op_cost(100);
4254  format %{  %}
4255  interface(REG_INTER);
4256%}
4257
4258// Indirect Memory Operand Long
4259operand load_long_indirect(load_long_RegP reg) %{
4260  constraint(ALLOC_IN_RC(esi_reg));
4261  match(reg);
4262
4263  format %{ "[$reg]" %}
4264  interface(MEMORY_INTER) %{
4265    base($reg);
4266    index(0x4);
4267    scale(0x0);
4268    disp(0x0);
4269  %}
4270%}
4271
4272// Indirect Memory Plus Long Offset Operand
4273operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4274  match(AddP reg off);
4275
4276  format %{ "[$reg + $off]" %}
4277  interface(MEMORY_INTER) %{
4278    base($reg);
4279    index(0x4);
4280    scale(0x0);
4281    disp($off);
4282  %}
4283%}
4284
4285opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4286
4287
4288//----------Special Memory Operands--------------------------------------------
4289// Stack Slot Operand - This operand is used for loading and storing temporary
4290//                      values on the stack where a match requires a value to
4291//                      flow through memory.
4292operand stackSlotP(sRegP reg) %{
4293  constraint(ALLOC_IN_RC(stack_slots));
4294  // No match rule because this operand is only generated in matching
4295  format %{ "[$reg]" %}
4296  interface(MEMORY_INTER) %{
4297    base(0x4);   // ESP
4298    index(0x4);  // No Index
4299    scale(0x0);  // No Scale
4300    disp($reg);  // Stack Offset
4301  %}
4302%}
4303
4304operand stackSlotI(sRegI reg) %{
4305  constraint(ALLOC_IN_RC(stack_slots));
4306  // No match rule because this operand is only generated in matching
4307  format %{ "[$reg]" %}
4308  interface(MEMORY_INTER) %{
4309    base(0x4);   // ESP
4310    index(0x4);  // No Index
4311    scale(0x0);  // No Scale
4312    disp($reg);  // Stack Offset
4313  %}
4314%}
4315
4316operand stackSlotF(sRegF reg) %{
4317  constraint(ALLOC_IN_RC(stack_slots));
4318  // No match rule because this operand is only generated in matching
4319  format %{ "[$reg]" %}
4320  interface(MEMORY_INTER) %{
4321    base(0x4);   // ESP
4322    index(0x4);  // No Index
4323    scale(0x0);  // No Scale
4324    disp($reg);  // Stack Offset
4325  %}
4326%}
4327
4328operand stackSlotD(sRegD reg) %{
4329  constraint(ALLOC_IN_RC(stack_slots));
4330  // No match rule because this operand is only generated in matching
4331  format %{ "[$reg]" %}
4332  interface(MEMORY_INTER) %{
4333    base(0x4);   // ESP
4334    index(0x4);  // No Index
4335    scale(0x0);  // No Scale
4336    disp($reg);  // Stack Offset
4337  %}
4338%}
4339
4340operand stackSlotL(sRegL reg) %{
4341  constraint(ALLOC_IN_RC(stack_slots));
4342  // No match rule because this operand is only generated in matching
4343  format %{ "[$reg]" %}
4344  interface(MEMORY_INTER) %{
4345    base(0x4);   // ESP
4346    index(0x4);  // No Index
4347    scale(0x0);  // No Scale
4348    disp($reg);  // Stack Offset
4349  %}
4350%}
4351
4352//----------Memory Operands - Win95 Implicit Null Variants----------------
4353// Indirect Memory Operand
4354operand indirect_win95_safe(eRegP_no_EBP reg)
4355%{
4356  constraint(ALLOC_IN_RC(int_reg));
4357  match(reg);
4358
4359  op_cost(100);
4360  format %{ "[$reg]" %}
4361  interface(MEMORY_INTER) %{
4362    base($reg);
4363    index(0x4);
4364    scale(0x0);
4365    disp(0x0);
4366  %}
4367%}
4368
4369// Indirect Memory Plus Short Offset Operand
4370operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4371%{
4372  match(AddP reg off);
4373
4374  op_cost(100);
4375  format %{ "[$reg + $off]" %}
4376  interface(MEMORY_INTER) %{
4377    base($reg);
4378    index(0x4);
4379    scale(0x0);
4380    disp($off);
4381  %}
4382%}
4383
4384// Indirect Memory Plus Long Offset Operand
4385operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4386%{
4387  match(AddP reg off);
4388
4389  op_cost(100);
4390  format %{ "[$reg + $off]" %}
4391  interface(MEMORY_INTER) %{
4392    base($reg);
4393    index(0x4);
4394    scale(0x0);
4395    disp($off);
4396  %}
4397%}
4398
4399// Indirect Memory Plus Index Register Plus Offset Operand
4400operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4401%{
4402  match(AddP (AddP reg ireg) off);
4403
4404  op_cost(100);
4405  format %{"[$reg + $off + $ireg]" %}
4406  interface(MEMORY_INTER) %{
4407    base($reg);
4408    index($ireg);
4409    scale(0x0);
4410    disp($off);
4411  %}
4412%}
4413
4414// Indirect Memory Times Scale Plus Index Register
4415operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4416%{
4417  match(AddP reg (LShiftI ireg scale));
4418
4419  op_cost(100);
4420  format %{"[$reg + $ireg << $scale]" %}
4421  interface(MEMORY_INTER) %{
4422    base($reg);
4423    index($ireg);
4424    scale($scale);
4425    disp(0x0);
4426  %}
4427%}
4428
4429// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4430operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4431%{
4432  match(AddP (AddP reg (LShiftI ireg scale)) off);
4433
4434  op_cost(100);
4435  format %{"[$reg + $off + $ireg << $scale]" %}
4436  interface(MEMORY_INTER) %{
4437    base($reg);
4438    index($ireg);
4439    scale($scale);
4440    disp($off);
4441  %}
4442%}
4443
4444//----------Conditional Branch Operands----------------------------------------
4445// Comparison Op  - This is the operation of the comparison, and is limited to
4446//                  the following set of codes:
4447//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4448//
4449// Other attributes of the comparison, such as unsignedness, are specified
4450// by the comparison instruction that sets a condition code flags register.
4451// That result is represented by a flags operand whose subtype is appropriate
4452// to the unsignedness (etc.) of the comparison.
4453//
4454// Later, the instruction which matches both the Comparison Op (a Bool) and
4455// the flags (produced by the Cmp) specifies the coding of the comparison op
4456// by matching a specific subtype of Bool operand below, such as cmpOpU.
4457
4458// Comparision Code
4459operand cmpOp() %{
4460  match(Bool);
4461
4462  format %{ "" %}
4463  interface(COND_INTER) %{
4464    equal(0x4, "e");
4465    not_equal(0x5, "ne");
4466    less(0xC, "l");
4467    greater_equal(0xD, "ge");
4468    less_equal(0xE, "le");
4469    greater(0xF, "g");
4470    overflow(0x0, "o");
4471    no_overflow(0x1, "no");
4472  %}
4473%}
4474
4475// Comparison Code, unsigned compare.  Used by FP also, with
4476// C2 (unordered) turned into GT or LT already.  The other bits
4477// C0 and C3 are turned into Carry & Zero flags.
4478operand cmpOpU() %{
4479  match(Bool);
4480
4481  format %{ "" %}
4482  interface(COND_INTER) %{
4483    equal(0x4, "e");
4484    not_equal(0x5, "ne");
4485    less(0x2, "b");
4486    greater_equal(0x3, "nb");
4487    less_equal(0x6, "be");
4488    greater(0x7, "nbe");
4489    overflow(0x0, "o");
4490    no_overflow(0x1, "no");
4491  %}
4492%}
4493
4494// Floating comparisons that don't require any fixup for the unordered case
4495operand cmpOpUCF() %{
4496  match(Bool);
4497  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4498            n->as_Bool()->_test._test == BoolTest::ge ||
4499            n->as_Bool()->_test._test == BoolTest::le ||
4500            n->as_Bool()->_test._test == BoolTest::gt);
4501  format %{ "" %}
4502  interface(COND_INTER) %{
4503    equal(0x4, "e");
4504    not_equal(0x5, "ne");
4505    less(0x2, "b");
4506    greater_equal(0x3, "nb");
4507    less_equal(0x6, "be");
4508    greater(0x7, "nbe");
4509    overflow(0x0, "o");
4510    no_overflow(0x1, "no");
4511  %}
4512%}
4513
4514
4515// Floating comparisons that can be fixed up with extra conditional jumps
4516operand cmpOpUCF2() %{
4517  match(Bool);
4518  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4519            n->as_Bool()->_test._test == BoolTest::eq);
4520  format %{ "" %}
4521  interface(COND_INTER) %{
4522    equal(0x4, "e");
4523    not_equal(0x5, "ne");
4524    less(0x2, "b");
4525    greater_equal(0x3, "nb");
4526    less_equal(0x6, "be");
4527    greater(0x7, "nbe");
4528    overflow(0x0, "o");
4529    no_overflow(0x1, "no");
4530  %}
4531%}
4532
4533// Comparison Code for FP conditional move
4534operand cmpOp_fcmov() %{
4535  match(Bool);
4536
4537  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4538            n->as_Bool()->_test._test != BoolTest::no_overflow);
4539  format %{ "" %}
4540  interface(COND_INTER) %{
4541    equal        (0x0C8);
4542    not_equal    (0x1C8);
4543    less         (0x0C0);
4544    greater_equal(0x1C0);
4545    less_equal   (0x0D0);
4546    greater      (0x1D0);
4547    overflow(0x0, "o"); // not really supported by the instruction
4548    no_overflow(0x1, "no"); // not really supported by the instruction
4549  %}
4550%}
4551
4552// Comparision Code used in long compares
4553operand cmpOp_commute() %{
4554  match(Bool);
4555
4556  format %{ "" %}
4557  interface(COND_INTER) %{
4558    equal(0x4, "e");
4559    not_equal(0x5, "ne");
4560    less(0xF, "g");
4561    greater_equal(0xE, "le");
4562    less_equal(0xD, "ge");
4563    greater(0xC, "l");
4564    overflow(0x0, "o");
4565    no_overflow(0x1, "no");
4566  %}
4567%}
4568
4569//----------OPERAND CLASSES----------------------------------------------------
4570// Operand Classes are groups of operands that are used as to simplify
4571// instruction definitions by not requiring the AD writer to specify separate
4572// instructions for every form of operand when the instruction accepts
4573// multiple operand types with the same basic encoding and format.  The classic
4574// case of this is memory operands.
4575
4576opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4577               indIndex, indIndexScale, indIndexScaleOffset);
4578
4579// Long memory operations are encoded in 2 instructions and a +4 offset.
4580// This means some kind of offset is always required and you cannot use
4581// an oop as the offset (done when working on static globals).
4582opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4583                    indIndex, indIndexScale, indIndexScaleOffset);
4584
4585
4586//----------PIPELINE-----------------------------------------------------------
4587// Rules which define the behavior of the target architectures pipeline.
4588pipeline %{
4589
4590//----------ATTRIBUTES---------------------------------------------------------
4591attributes %{
4592  variable_size_instructions;        // Fixed size instructions
4593  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4594  instruction_unit_size = 1;         // An instruction is 1 bytes long
4595  instruction_fetch_unit_size = 16;  // The processor fetches one line
4596  instruction_fetch_units = 1;       // of 16 bytes
4597
4598  // List of nop instructions
4599  nops( MachNop );
4600%}
4601
4602//----------RESOURCES----------------------------------------------------------
4603// Resources are the functional units available to the machine
4604
4605// Generic P2/P3 pipeline
4606// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4607// 3 instructions decoded per cycle.
4608// 2 load/store ops per cycle, 1 branch, 1 FPU,
4609// 2 ALU op, only ALU0 handles mul/div instructions.
4610resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4611           MS0, MS1, MEM = MS0 | MS1,
4612           BR, FPU,
4613           ALU0, ALU1, ALU = ALU0 | ALU1 );
4614
4615//----------PIPELINE DESCRIPTION-----------------------------------------------
4616// Pipeline Description specifies the stages in the machine's pipeline
4617
4618// Generic P2/P3 pipeline
4619pipe_desc(S0, S1, S2, S3, S4, S5);
4620
4621//----------PIPELINE CLASSES---------------------------------------------------
4622// Pipeline Classes describe the stages in which input and output are
4623// referenced by the hardware pipeline.
4624
4625// Naming convention: ialu or fpu
4626// Then: _reg
4627// Then: _reg if there is a 2nd register
4628// Then: _long if it's a pair of instructions implementing a long
4629// Then: _fat if it requires the big decoder
4630//   Or: _mem if it requires the big decoder and a memory unit.
4631
4632// Integer ALU reg operation
4633pipe_class ialu_reg(rRegI dst) %{
4634    single_instruction;
4635    dst    : S4(write);
4636    dst    : S3(read);
4637    DECODE : S0;        // any decoder
4638    ALU    : S3;        // any alu
4639%}
4640
4641// Long ALU reg operation
4642pipe_class ialu_reg_long(eRegL dst) %{
4643    instruction_count(2);
4644    dst    : S4(write);
4645    dst    : S3(read);
4646    DECODE : S0(2);     // any 2 decoders
4647    ALU    : S3(2);     // both alus
4648%}
4649
4650// Integer ALU reg operation using big decoder
4651pipe_class ialu_reg_fat(rRegI dst) %{
4652    single_instruction;
4653    dst    : S4(write);
4654    dst    : S3(read);
4655    D0     : S0;        // big decoder only
4656    ALU    : S3;        // any alu
4657%}
4658
4659// Long ALU reg operation using big decoder
4660pipe_class ialu_reg_long_fat(eRegL dst) %{
4661    instruction_count(2);
4662    dst    : S4(write);
4663    dst    : S3(read);
4664    D0     : S0(2);     // big decoder only; twice
4665    ALU    : S3(2);     // any 2 alus
4666%}
4667
4668// Integer ALU reg-reg operation
4669pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4670    single_instruction;
4671    dst    : S4(write);
4672    src    : S3(read);
4673    DECODE : S0;        // any decoder
4674    ALU    : S3;        // any alu
4675%}
4676
4677// Long ALU reg-reg operation
4678pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4679    instruction_count(2);
4680    dst    : S4(write);
4681    src    : S3(read);
4682    DECODE : S0(2);     // any 2 decoders
4683    ALU    : S3(2);     // both alus
4684%}
4685
4686// Integer ALU reg-reg operation
4687pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4688    single_instruction;
4689    dst    : S4(write);
4690    src    : S3(read);
4691    D0     : S0;        // big decoder only
4692    ALU    : S3;        // any alu
4693%}
4694
4695// Long ALU reg-reg operation
4696pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4697    instruction_count(2);
4698    dst    : S4(write);
4699    src    : S3(read);
4700    D0     : S0(2);     // big decoder only; twice
4701    ALU    : S3(2);     // both alus
4702%}
4703
4704// Integer ALU reg-mem operation
4705pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4706    single_instruction;
4707    dst    : S5(write);
4708    mem    : S3(read);
4709    D0     : S0;        // big decoder only
4710    ALU    : S4;        // any alu
4711    MEM    : S3;        // any mem
4712%}
4713
4714// Long ALU reg-mem operation
4715pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4716    instruction_count(2);
4717    dst    : S5(write);
4718    mem    : S3(read);
4719    D0     : S0(2);     // big decoder only; twice
4720    ALU    : S4(2);     // any 2 alus
4721    MEM    : S3(2);     // both mems
4722%}
4723
4724// Integer mem operation (prefetch)
4725pipe_class ialu_mem(memory mem)
4726%{
4727    single_instruction;
4728    mem    : S3(read);
4729    D0     : S0;        // big decoder only
4730    MEM    : S3;        // any mem
4731%}
4732
4733// Integer Store to Memory
4734pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4735    single_instruction;
4736    mem    : S3(read);
4737    src    : S5(read);
4738    D0     : S0;        // big decoder only
4739    ALU    : S4;        // any alu
4740    MEM    : S3;
4741%}
4742
4743// Long Store to Memory
4744pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4745    instruction_count(2);
4746    mem    : S3(read);
4747    src    : S5(read);
4748    D0     : S0(2);     // big decoder only; twice
4749    ALU    : S4(2);     // any 2 alus
4750    MEM    : S3(2);     // Both mems
4751%}
4752
4753// Integer Store to Memory
4754pipe_class ialu_mem_imm(memory mem) %{
4755    single_instruction;
4756    mem    : S3(read);
4757    D0     : S0;        // big decoder only
4758    ALU    : S4;        // any alu
4759    MEM    : S3;
4760%}
4761
4762// Integer ALU0 reg-reg operation
4763pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4764    single_instruction;
4765    dst    : S4(write);
4766    src    : S3(read);
4767    D0     : S0;        // Big decoder only
4768    ALU0   : S3;        // only alu0
4769%}
4770
4771// Integer ALU0 reg-mem operation
4772pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4773    single_instruction;
4774    dst    : S5(write);
4775    mem    : S3(read);
4776    D0     : S0;        // big decoder only
4777    ALU0   : S4;        // ALU0 only
4778    MEM    : S3;        // any mem
4779%}
4780
4781// Integer ALU reg-reg operation
4782pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4783    single_instruction;
4784    cr     : S4(write);
4785    src1   : S3(read);
4786    src2   : S3(read);
4787    DECODE : S0;        // any decoder
4788    ALU    : S3;        // any alu
4789%}
4790
4791// Integer ALU reg-imm operation
4792pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4793    single_instruction;
4794    cr     : S4(write);
4795    src1   : S3(read);
4796    DECODE : S0;        // any decoder
4797    ALU    : S3;        // any alu
4798%}
4799
4800// Integer ALU reg-mem operation
4801pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4802    single_instruction;
4803    cr     : S4(write);
4804    src1   : S3(read);
4805    src2   : S3(read);
4806    D0     : S0;        // big decoder only
4807    ALU    : S4;        // any alu
4808    MEM    : S3;
4809%}
4810
4811// Conditional move reg-reg
4812pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4813    instruction_count(4);
4814    y      : S4(read);
4815    q      : S3(read);
4816    p      : S3(read);
4817    DECODE : S0(4);     // any decoder
4818%}
4819
4820// Conditional move reg-reg
4821pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4822    single_instruction;
4823    dst    : S4(write);
4824    src    : S3(read);
4825    cr     : S3(read);
4826    DECODE : S0;        // any decoder
4827%}
4828
4829// Conditional move reg-mem
4830pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4831    single_instruction;
4832    dst    : S4(write);
4833    src    : S3(read);
4834    cr     : S3(read);
4835    DECODE : S0;        // any decoder
4836    MEM    : S3;
4837%}
4838
4839// Conditional move reg-reg long
4840pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4841    single_instruction;
4842    dst    : S4(write);
4843    src    : S3(read);
4844    cr     : S3(read);
4845    DECODE : S0(2);     // any 2 decoders
4846%}
4847
4848// Conditional move double reg-reg
4849pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4850    single_instruction;
4851    dst    : S4(write);
4852    src    : S3(read);
4853    cr     : S3(read);
4854    DECODE : S0;        // any decoder
4855%}
4856
4857// Float reg-reg operation
4858pipe_class fpu_reg(regDPR dst) %{
4859    instruction_count(2);
4860    dst    : S3(read);
4861    DECODE : S0(2);     // any 2 decoders
4862    FPU    : S3;
4863%}
4864
4865// Float reg-reg operation
4866pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4867    instruction_count(2);
4868    dst    : S4(write);
4869    src    : S3(read);
4870    DECODE : S0(2);     // any 2 decoders
4871    FPU    : S3;
4872%}
4873
4874// Float reg-reg operation
4875pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4876    instruction_count(3);
4877    dst    : S4(write);
4878    src1   : S3(read);
4879    src2   : S3(read);
4880    DECODE : S0(3);     // any 3 decoders
4881    FPU    : S3(2);
4882%}
4883
4884// Float reg-reg operation
4885pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4886    instruction_count(4);
4887    dst    : S4(write);
4888    src1   : S3(read);
4889    src2   : S3(read);
4890    src3   : S3(read);
4891    DECODE : S0(4);     // any 3 decoders
4892    FPU    : S3(2);
4893%}
4894
4895// Float reg-reg operation
4896pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4897    instruction_count(4);
4898    dst    : S4(write);
4899    src1   : S3(read);
4900    src2   : S3(read);
4901    src3   : S3(read);
4902    DECODE : S1(3);     // any 3 decoders
4903    D0     : S0;        // Big decoder only
4904    FPU    : S3(2);
4905    MEM    : S3;
4906%}
4907
4908// Float reg-mem operation
4909pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4910    instruction_count(2);
4911    dst    : S5(write);
4912    mem    : S3(read);
4913    D0     : S0;        // big decoder only
4914    DECODE : S1;        // any decoder for FPU POP
4915    FPU    : S4;
4916    MEM    : S3;        // any mem
4917%}
4918
4919// Float reg-mem operation
4920pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4921    instruction_count(3);
4922    dst    : S5(write);
4923    src1   : S3(read);
4924    mem    : S3(read);
4925    D0     : S0;        // big decoder only
4926    DECODE : S1(2);     // any decoder for FPU POP
4927    FPU    : S4;
4928    MEM    : S3;        // any mem
4929%}
4930
4931// Float mem-reg operation
4932pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4933    instruction_count(2);
4934    src    : S5(read);
4935    mem    : S3(read);
4936    DECODE : S0;        // any decoder for FPU PUSH
4937    D0     : S1;        // big decoder only
4938    FPU    : S4;
4939    MEM    : S3;        // any mem
4940%}
4941
4942pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4943    instruction_count(3);
4944    src1   : S3(read);
4945    src2   : S3(read);
4946    mem    : S3(read);
4947    DECODE : S0(2);     // any decoder for FPU PUSH
4948    D0     : S1;        // big decoder only
4949    FPU    : S4;
4950    MEM    : S3;        // any mem
4951%}
4952
4953pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4954    instruction_count(3);
4955    src1   : S3(read);
4956    src2   : S3(read);
4957    mem    : S4(read);
4958    DECODE : S0;        // any decoder for FPU PUSH
4959    D0     : S0(2);     // big decoder only
4960    FPU    : S4;
4961    MEM    : S3(2);     // any mem
4962%}
4963
4964pipe_class fpu_mem_mem(memory dst, memory src1) %{
4965    instruction_count(2);
4966    src1   : S3(read);
4967    dst    : S4(read);
4968    D0     : S0(2);     // big decoder only
4969    MEM    : S3(2);     // any mem
4970%}
4971
4972pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4973    instruction_count(3);
4974    src1   : S3(read);
4975    src2   : S3(read);
4976    dst    : S4(read);
4977    D0     : S0(3);     // big decoder only
4978    FPU    : S4;
4979    MEM    : S3(3);     // any mem
4980%}
4981
4982pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4983    instruction_count(3);
4984    src1   : S4(read);
4985    mem    : S4(read);
4986    DECODE : S0;        // any decoder for FPU PUSH
4987    D0     : S0(2);     // big decoder only
4988    FPU    : S4;
4989    MEM    : S3(2);     // any mem
4990%}
4991
4992// Float load constant
4993pipe_class fpu_reg_con(regDPR dst) %{
4994    instruction_count(2);
4995    dst    : S5(write);
4996    D0     : S0;        // big decoder only for the load
4997    DECODE : S1;        // any decoder for FPU POP
4998    FPU    : S4;
4999    MEM    : S3;        // any mem
5000%}
5001
5002// Float load constant
5003pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5004    instruction_count(3);
5005    dst    : S5(write);
5006    src    : S3(read);
5007    D0     : S0;        // big decoder only for the load
5008    DECODE : S1(2);     // any decoder for FPU POP
5009    FPU    : S4;
5010    MEM    : S3;        // any mem
5011%}
5012
5013// UnConditional branch
5014pipe_class pipe_jmp( label labl ) %{
5015    single_instruction;
5016    BR   : S3;
5017%}
5018
5019// Conditional branch
5020pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5021    single_instruction;
5022    cr    : S1(read);
5023    BR    : S3;
5024%}
5025
5026// Allocation idiom
5027pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5028    instruction_count(1); force_serialization;
5029    fixed_latency(6);
5030    heap_ptr : S3(read);
5031    DECODE   : S0(3);
5032    D0       : S2;
5033    MEM      : S3;
5034    ALU      : S3(2);
5035    dst      : S5(write);
5036    BR       : S5;
5037%}
5038
5039// Generic big/slow expanded idiom
5040pipe_class pipe_slow(  ) %{
5041    instruction_count(10); multiple_bundles; force_serialization;
5042    fixed_latency(100);
5043    D0  : S0(2);
5044    MEM : S3(2);
5045%}
5046
5047// The real do-nothing guy
5048pipe_class empty( ) %{
5049    instruction_count(0);
5050%}
5051
5052// Define the class for the Nop node
5053define %{
5054   MachNop = empty;
5055%}
5056
5057%}
5058
5059//----------INSTRUCTIONS-------------------------------------------------------
5060//
5061// match      -- States which machine-independent subtree may be replaced
5062//               by this instruction.
5063// ins_cost   -- The estimated cost of this instruction is used by instruction
5064//               selection to identify a minimum cost tree of machine
5065//               instructions that matches a tree of machine-independent
5066//               instructions.
5067// format     -- A string providing the disassembly for this instruction.
5068//               The value of an instruction's operand may be inserted
5069//               by referring to it with a '$' prefix.
5070// opcode     -- Three instruction opcodes may be provided.  These are referred
5071//               to within an encode class as $primary, $secondary, and $tertiary
5072//               respectively.  The primary opcode is commonly used to
5073//               indicate the type of machine instruction, while secondary
5074//               and tertiary are often used for prefix options or addressing
5075//               modes.
5076// ins_encode -- A list of encode classes with parameters. The encode class
5077//               name must have been defined in an 'enc_class' specification
5078//               in the encode section of the architecture description.
5079
5080//----------BSWAP-Instruction--------------------------------------------------
5081instruct bytes_reverse_int(rRegI dst) %{
5082  match(Set dst (ReverseBytesI dst));
5083
5084  format %{ "BSWAP  $dst" %}
5085  opcode(0x0F, 0xC8);
5086  ins_encode( OpcP, OpcSReg(dst) );
5087  ins_pipe( ialu_reg );
5088%}
5089
5090instruct bytes_reverse_long(eRegL dst) %{
5091  match(Set dst (ReverseBytesL dst));
5092
5093  format %{ "BSWAP  $dst.lo\n\t"
5094            "BSWAP  $dst.hi\n\t"
5095            "XCHG   $dst.lo $dst.hi" %}
5096
5097  ins_cost(125);
5098  ins_encode( bswap_long_bytes(dst) );
5099  ins_pipe( ialu_reg_reg);
5100%}
5101
5102instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5103  match(Set dst (ReverseBytesUS dst));
5104  effect(KILL cr);
5105
5106  format %{ "BSWAP  $dst\n\t"
5107            "SHR    $dst,16\n\t" %}
5108  ins_encode %{
5109    __ bswapl($dst$$Register);
5110    __ shrl($dst$$Register, 16);
5111  %}
5112  ins_pipe( ialu_reg );
5113%}
5114
5115instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5116  match(Set dst (ReverseBytesS dst));
5117  effect(KILL cr);
5118
5119  format %{ "BSWAP  $dst\n\t"
5120            "SAR    $dst,16\n\t" %}
5121  ins_encode %{
5122    __ bswapl($dst$$Register);
5123    __ sarl($dst$$Register, 16);
5124  %}
5125  ins_pipe( ialu_reg );
5126%}
5127
5128
5129//---------- Zeros Count Instructions ------------------------------------------
5130
5131instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5132  predicate(UseCountLeadingZerosInstruction);
5133  match(Set dst (CountLeadingZerosI src));
5134  effect(KILL cr);
5135
5136  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5137  ins_encode %{
5138    __ lzcntl($dst$$Register, $src$$Register);
5139  %}
5140  ins_pipe(ialu_reg);
5141%}
5142
5143instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5144  predicate(!UseCountLeadingZerosInstruction);
5145  match(Set dst (CountLeadingZerosI src));
5146  effect(KILL cr);
5147
5148  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5149            "JNZ    skip\n\t"
5150            "MOV    $dst, -1\n"
5151      "skip:\n\t"
5152            "NEG    $dst\n\t"
5153            "ADD    $dst, 31" %}
5154  ins_encode %{
5155    Register Rdst = $dst$$Register;
5156    Register Rsrc = $src$$Register;
5157    Label skip;
5158    __ bsrl(Rdst, Rsrc);
5159    __ jccb(Assembler::notZero, skip);
5160    __ movl(Rdst, -1);
5161    __ bind(skip);
5162    __ negl(Rdst);
5163    __ addl(Rdst, BitsPerInt - 1);
5164  %}
5165  ins_pipe(ialu_reg);
5166%}
5167
5168instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5169  predicate(UseCountLeadingZerosInstruction);
5170  match(Set dst (CountLeadingZerosL src));
5171  effect(TEMP dst, KILL cr);
5172
5173  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5174            "JNC    done\n\t"
5175            "LZCNT  $dst, $src.lo\n\t"
5176            "ADD    $dst, 32\n"
5177      "done:" %}
5178  ins_encode %{
5179    Register Rdst = $dst$$Register;
5180    Register Rsrc = $src$$Register;
5181    Label done;
5182    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5183    __ jccb(Assembler::carryClear, done);
5184    __ lzcntl(Rdst, Rsrc);
5185    __ addl(Rdst, BitsPerInt);
5186    __ bind(done);
5187  %}
5188  ins_pipe(ialu_reg);
5189%}
5190
5191instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5192  predicate(!UseCountLeadingZerosInstruction);
5193  match(Set dst (CountLeadingZerosL src));
5194  effect(TEMP dst, KILL cr);
5195
5196  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5197            "JZ     msw_is_zero\n\t"
5198            "ADD    $dst, 32\n\t"
5199            "JMP    not_zero\n"
5200      "msw_is_zero:\n\t"
5201            "BSR    $dst, $src.lo\n\t"
5202            "JNZ    not_zero\n\t"
5203            "MOV    $dst, -1\n"
5204      "not_zero:\n\t"
5205            "NEG    $dst\n\t"
5206            "ADD    $dst, 63\n" %}
5207 ins_encode %{
5208    Register Rdst = $dst$$Register;
5209    Register Rsrc = $src$$Register;
5210    Label msw_is_zero;
5211    Label not_zero;
5212    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5213    __ jccb(Assembler::zero, msw_is_zero);
5214    __ addl(Rdst, BitsPerInt);
5215    __ jmpb(not_zero);
5216    __ bind(msw_is_zero);
5217    __ bsrl(Rdst, Rsrc);
5218    __ jccb(Assembler::notZero, not_zero);
5219    __ movl(Rdst, -1);
5220    __ bind(not_zero);
5221    __ negl(Rdst);
5222    __ addl(Rdst, BitsPerLong - 1);
5223  %}
5224  ins_pipe(ialu_reg);
5225%}
5226
5227instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5228  predicate(UseCountTrailingZerosInstruction);
5229  match(Set dst (CountTrailingZerosI src));
5230  effect(KILL cr);
5231
5232  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5233  ins_encode %{
5234    __ tzcntl($dst$$Register, $src$$Register);
5235  %}
5236  ins_pipe(ialu_reg);
5237%}
5238
5239instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5240  predicate(!UseCountTrailingZerosInstruction);
5241  match(Set dst (CountTrailingZerosI src));
5242  effect(KILL cr);
5243
5244  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5245            "JNZ    done\n\t"
5246            "MOV    $dst, 32\n"
5247      "done:" %}
5248  ins_encode %{
5249    Register Rdst = $dst$$Register;
5250    Label done;
5251    __ bsfl(Rdst, $src$$Register);
5252    __ jccb(Assembler::notZero, done);
5253    __ movl(Rdst, BitsPerInt);
5254    __ bind(done);
5255  %}
5256  ins_pipe(ialu_reg);
5257%}
5258
5259instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5260  predicate(UseCountTrailingZerosInstruction);
5261  match(Set dst (CountTrailingZerosL src));
5262  effect(TEMP dst, KILL cr);
5263
5264  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5265            "JNC    done\n\t"
5266            "TZCNT  $dst, $src.hi\n\t"
5267            "ADD    $dst, 32\n"
5268            "done:" %}
5269  ins_encode %{
5270    Register Rdst = $dst$$Register;
5271    Register Rsrc = $src$$Register;
5272    Label done;
5273    __ tzcntl(Rdst, Rsrc);
5274    __ jccb(Assembler::carryClear, done);
5275    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5276    __ addl(Rdst, BitsPerInt);
5277    __ bind(done);
5278  %}
5279  ins_pipe(ialu_reg);
5280%}
5281
5282instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5283  predicate(!UseCountTrailingZerosInstruction);
5284  match(Set dst (CountTrailingZerosL src));
5285  effect(TEMP dst, KILL cr);
5286
5287  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5288            "JNZ    done\n\t"
5289            "BSF    $dst, $src.hi\n\t"
5290            "JNZ    msw_not_zero\n\t"
5291            "MOV    $dst, 32\n"
5292      "msw_not_zero:\n\t"
5293            "ADD    $dst, 32\n"
5294      "done:" %}
5295  ins_encode %{
5296    Register Rdst = $dst$$Register;
5297    Register Rsrc = $src$$Register;
5298    Label msw_not_zero;
5299    Label done;
5300    __ bsfl(Rdst, Rsrc);
5301    __ jccb(Assembler::notZero, done);
5302    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5303    __ jccb(Assembler::notZero, msw_not_zero);
5304    __ movl(Rdst, BitsPerInt);
5305    __ bind(msw_not_zero);
5306    __ addl(Rdst, BitsPerInt);
5307    __ bind(done);
5308  %}
5309  ins_pipe(ialu_reg);
5310%}
5311
5312
5313//---------- Population Count Instructions -------------------------------------
5314
5315instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5316  predicate(UsePopCountInstruction);
5317  match(Set dst (PopCountI src));
5318  effect(KILL cr);
5319
5320  format %{ "POPCNT $dst, $src" %}
5321  ins_encode %{
5322    __ popcntl($dst$$Register, $src$$Register);
5323  %}
5324  ins_pipe(ialu_reg);
5325%}
5326
5327instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5328  predicate(UsePopCountInstruction);
5329  match(Set dst (PopCountI (LoadI mem)));
5330  effect(KILL cr);
5331
5332  format %{ "POPCNT $dst, $mem" %}
5333  ins_encode %{
5334    __ popcntl($dst$$Register, $mem$$Address);
5335  %}
5336  ins_pipe(ialu_reg);
5337%}
5338
5339// Note: Long.bitCount(long) returns an int.
5340instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5341  predicate(UsePopCountInstruction);
5342  match(Set dst (PopCountL src));
5343  effect(KILL cr, TEMP tmp, TEMP dst);
5344
5345  format %{ "POPCNT $dst, $src.lo\n\t"
5346            "POPCNT $tmp, $src.hi\n\t"
5347            "ADD    $dst, $tmp" %}
5348  ins_encode %{
5349    __ popcntl($dst$$Register, $src$$Register);
5350    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5351    __ addl($dst$$Register, $tmp$$Register);
5352  %}
5353  ins_pipe(ialu_reg);
5354%}
5355
5356// Note: Long.bitCount(long) returns an int.
5357instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5358  predicate(UsePopCountInstruction);
5359  match(Set dst (PopCountL (LoadL mem)));
5360  effect(KILL cr, TEMP tmp, TEMP dst);
5361
5362  format %{ "POPCNT $dst, $mem\n\t"
5363            "POPCNT $tmp, $mem+4\n\t"
5364            "ADD    $dst, $tmp" %}
5365  ins_encode %{
5366    //__ popcntl($dst$$Register, $mem$$Address$$first);
5367    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5368    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5369    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5370    __ addl($dst$$Register, $tmp$$Register);
5371  %}
5372  ins_pipe(ialu_reg);
5373%}
5374
5375
5376//----------Load/Store/Move Instructions---------------------------------------
5377//----------Load Instructions--------------------------------------------------
5378// Load Byte (8bit signed)
5379instruct loadB(xRegI dst, memory mem) %{
5380  match(Set dst (LoadB mem));
5381
5382  ins_cost(125);
5383  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5384
5385  ins_encode %{
5386    __ movsbl($dst$$Register, $mem$$Address);
5387  %}
5388
5389  ins_pipe(ialu_reg_mem);
5390%}
5391
5392// Load Byte (8bit signed) into Long Register
5393instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5394  match(Set dst (ConvI2L (LoadB mem)));
5395  effect(KILL cr);
5396
5397  ins_cost(375);
5398  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5399            "MOV    $dst.hi,$dst.lo\n\t"
5400            "SAR    $dst.hi,7" %}
5401
5402  ins_encode %{
5403    __ movsbl($dst$$Register, $mem$$Address);
5404    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5405    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5406  %}
5407
5408  ins_pipe(ialu_reg_mem);
5409%}
5410
5411// Load Unsigned Byte (8bit UNsigned)
5412instruct loadUB(xRegI dst, memory mem) %{
5413  match(Set dst (LoadUB mem));
5414
5415  ins_cost(125);
5416  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5417
5418  ins_encode %{
5419    __ movzbl($dst$$Register, $mem$$Address);
5420  %}
5421
5422  ins_pipe(ialu_reg_mem);
5423%}
5424
5425// Load Unsigned Byte (8 bit UNsigned) into Long Register
5426instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5427  match(Set dst (ConvI2L (LoadUB mem)));
5428  effect(KILL cr);
5429
5430  ins_cost(250);
5431  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5432            "XOR    $dst.hi,$dst.hi" %}
5433
5434  ins_encode %{
5435    Register Rdst = $dst$$Register;
5436    __ movzbl(Rdst, $mem$$Address);
5437    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5438  %}
5439
5440  ins_pipe(ialu_reg_mem);
5441%}
5442
5443// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5444instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5445  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5446  effect(KILL cr);
5447
5448  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5449            "XOR    $dst.hi,$dst.hi\n\t"
5450            "AND    $dst.lo,right_n_bits($mask, 8)" %}
5451  ins_encode %{
5452    Register Rdst = $dst$$Register;
5453    __ movzbl(Rdst, $mem$$Address);
5454    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5455    __ andl(Rdst, $mask$$constant & right_n_bits(8));
5456  %}
5457  ins_pipe(ialu_reg_mem);
5458%}
5459
5460// Load Short (16bit signed)
5461instruct loadS(rRegI dst, memory mem) %{
5462  match(Set dst (LoadS mem));
5463
5464  ins_cost(125);
5465  format %{ "MOVSX  $dst,$mem\t# short" %}
5466
5467  ins_encode %{
5468    __ movswl($dst$$Register, $mem$$Address);
5469  %}
5470
5471  ins_pipe(ialu_reg_mem);
5472%}
5473
5474// Load Short (16 bit signed) to Byte (8 bit signed)
5475instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5476  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5477
5478  ins_cost(125);
5479  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5480  ins_encode %{
5481    __ movsbl($dst$$Register, $mem$$Address);
5482  %}
5483  ins_pipe(ialu_reg_mem);
5484%}
5485
5486// Load Short (16bit signed) into Long Register
5487instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5488  match(Set dst (ConvI2L (LoadS mem)));
5489  effect(KILL cr);
5490
5491  ins_cost(375);
5492  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5493            "MOV    $dst.hi,$dst.lo\n\t"
5494            "SAR    $dst.hi,15" %}
5495
5496  ins_encode %{
5497    __ movswl($dst$$Register, $mem$$Address);
5498    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5499    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5500  %}
5501
5502  ins_pipe(ialu_reg_mem);
5503%}
5504
5505// Load Unsigned Short/Char (16bit unsigned)
5506instruct loadUS(rRegI dst, memory mem) %{
5507  match(Set dst (LoadUS mem));
5508
5509  ins_cost(125);
5510  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5511
5512  ins_encode %{
5513    __ movzwl($dst$$Register, $mem$$Address);
5514  %}
5515
5516  ins_pipe(ialu_reg_mem);
5517%}
5518
5519// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5520instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5521  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5522
5523  ins_cost(125);
5524  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5525  ins_encode %{
5526    __ movsbl($dst$$Register, $mem$$Address);
5527  %}
5528  ins_pipe(ialu_reg_mem);
5529%}
5530
5531// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5532instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5533  match(Set dst (ConvI2L (LoadUS mem)));
5534  effect(KILL cr);
5535
5536  ins_cost(250);
5537  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5538            "XOR    $dst.hi,$dst.hi" %}
5539
5540  ins_encode %{
5541    __ movzwl($dst$$Register, $mem$$Address);
5542    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5543  %}
5544
5545  ins_pipe(ialu_reg_mem);
5546%}
5547
5548// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5549instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5550  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5551  effect(KILL cr);
5552
5553  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5554            "XOR    $dst.hi,$dst.hi" %}
5555  ins_encode %{
5556    Register Rdst = $dst$$Register;
5557    __ movzbl(Rdst, $mem$$Address);
5558    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5559  %}
5560  ins_pipe(ialu_reg_mem);
5561%}
5562
5563// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5564instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5565  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5566  effect(KILL cr);
5567
5568  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5569            "XOR    $dst.hi,$dst.hi\n\t"
5570            "AND    $dst.lo,right_n_bits($mask, 16)" %}
5571  ins_encode %{
5572    Register Rdst = $dst$$Register;
5573    __ movzwl(Rdst, $mem$$Address);
5574    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5575    __ andl(Rdst, $mask$$constant & right_n_bits(16));
5576  %}
5577  ins_pipe(ialu_reg_mem);
5578%}
5579
5580// Load Integer
5581instruct loadI(rRegI dst, memory mem) %{
5582  match(Set dst (LoadI mem));
5583
5584  ins_cost(125);
5585  format %{ "MOV    $dst,$mem\t# int" %}
5586
5587  ins_encode %{
5588    __ movl($dst$$Register, $mem$$Address);
5589  %}
5590
5591  ins_pipe(ialu_reg_mem);
5592%}
5593
5594// Load Integer (32 bit signed) to Byte (8 bit signed)
5595instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5596  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5597
5598  ins_cost(125);
5599  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5600  ins_encode %{
5601    __ movsbl($dst$$Register, $mem$$Address);
5602  %}
5603  ins_pipe(ialu_reg_mem);
5604%}
5605
5606// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5607instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5608  match(Set dst (AndI (LoadI mem) mask));
5609
5610  ins_cost(125);
5611  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5612  ins_encode %{
5613    __ movzbl($dst$$Register, $mem$$Address);
5614  %}
5615  ins_pipe(ialu_reg_mem);
5616%}
5617
5618// Load Integer (32 bit signed) to Short (16 bit signed)
5619instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5620  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5621
5622  ins_cost(125);
5623  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5624  ins_encode %{
5625    __ movswl($dst$$Register, $mem$$Address);
5626  %}
5627  ins_pipe(ialu_reg_mem);
5628%}
5629
5630// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5631instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5632  match(Set dst (AndI (LoadI mem) mask));
5633
5634  ins_cost(125);
5635  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5636  ins_encode %{
5637    __ movzwl($dst$$Register, $mem$$Address);
5638  %}
5639  ins_pipe(ialu_reg_mem);
5640%}
5641
5642// Load Integer into Long Register
5643instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5644  match(Set dst (ConvI2L (LoadI mem)));
5645  effect(KILL cr);
5646
5647  ins_cost(375);
5648  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5649            "MOV    $dst.hi,$dst.lo\n\t"
5650            "SAR    $dst.hi,31" %}
5651
5652  ins_encode %{
5653    __ movl($dst$$Register, $mem$$Address);
5654    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5655    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5656  %}
5657
5658  ins_pipe(ialu_reg_mem);
5659%}
5660
5661// Load Integer with mask 0xFF into Long Register
5662instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5663  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5664  effect(KILL cr);
5665
5666  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5667            "XOR    $dst.hi,$dst.hi" %}
5668  ins_encode %{
5669    Register Rdst = $dst$$Register;
5670    __ movzbl(Rdst, $mem$$Address);
5671    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5672  %}
5673  ins_pipe(ialu_reg_mem);
5674%}
5675
5676// Load Integer with mask 0xFFFF into Long Register
5677instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5678  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5679  effect(KILL cr);
5680
5681  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5682            "XOR    $dst.hi,$dst.hi" %}
5683  ins_encode %{
5684    Register Rdst = $dst$$Register;
5685    __ movzwl(Rdst, $mem$$Address);
5686    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5687  %}
5688  ins_pipe(ialu_reg_mem);
5689%}
5690
5691// Load Integer with 31-bit mask into Long Register
5692instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5693  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5694  effect(KILL cr);
5695
5696  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5697            "XOR    $dst.hi,$dst.hi\n\t"
5698            "AND    $dst.lo,$mask" %}
5699  ins_encode %{
5700    Register Rdst = $dst$$Register;
5701    __ movl(Rdst, $mem$$Address);
5702    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5703    __ andl(Rdst, $mask$$constant);
5704  %}
5705  ins_pipe(ialu_reg_mem);
5706%}
5707
5708// Load Unsigned Integer into Long Register
5709instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5710  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5711  effect(KILL cr);
5712
5713  ins_cost(250);
5714  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5715            "XOR    $dst.hi,$dst.hi" %}
5716
5717  ins_encode %{
5718    __ movl($dst$$Register, $mem$$Address);
5719    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5720  %}
5721
5722  ins_pipe(ialu_reg_mem);
5723%}
5724
5725// Load Long.  Cannot clobber address while loading, so restrict address
5726// register to ESI
5727instruct loadL(eRegL dst, load_long_memory mem) %{
5728  predicate(!((LoadLNode*)n)->require_atomic_access());
5729  match(Set dst (LoadL mem));
5730
5731  ins_cost(250);
5732  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5733            "MOV    $dst.hi,$mem+4" %}
5734
5735  ins_encode %{
5736    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5737    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5738    __ movl($dst$$Register, Amemlo);
5739    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5740  %}
5741
5742  ins_pipe(ialu_reg_long_mem);
5743%}
5744
5745// Volatile Load Long.  Must be atomic, so do 64-bit FILD
5746// then store it down to the stack and reload on the int
5747// side.
5748instruct loadL_volatile(stackSlotL dst, memory mem) %{
5749  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5750  match(Set dst (LoadL mem));
5751
5752  ins_cost(200);
5753  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5754            "FISTp  $dst" %}
5755  ins_encode(enc_loadL_volatile(mem,dst));
5756  ins_pipe( fpu_reg_mem );
5757%}
5758
5759instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5760  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5761  match(Set dst (LoadL mem));
5762  effect(TEMP tmp);
5763  ins_cost(180);
5764  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5765            "MOVSD  $dst,$tmp" %}
5766  ins_encode %{
5767    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5768    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5769  %}
5770  ins_pipe( pipe_slow );
5771%}
5772
5773instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5774  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5775  match(Set dst (LoadL mem));
5776  effect(TEMP tmp);
5777  ins_cost(160);
5778  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5779            "MOVD   $dst.lo,$tmp\n\t"
5780            "PSRLQ  $tmp,32\n\t"
5781            "MOVD   $dst.hi,$tmp" %}
5782  ins_encode %{
5783    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5784    __ movdl($dst$$Register, $tmp$$XMMRegister);
5785    __ psrlq($tmp$$XMMRegister, 32);
5786    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5787  %}
5788  ins_pipe( pipe_slow );
5789%}
5790
5791// Load Range
5792instruct loadRange(rRegI dst, memory mem) %{
5793  match(Set dst (LoadRange mem));
5794
5795  ins_cost(125);
5796  format %{ "MOV    $dst,$mem" %}
5797  opcode(0x8B);
5798  ins_encode( OpcP, RegMem(dst,mem));
5799  ins_pipe( ialu_reg_mem );
5800%}
5801
5802
5803// Load Pointer
5804instruct loadP(eRegP dst, memory mem) %{
5805  match(Set dst (LoadP mem));
5806
5807  ins_cost(125);
5808  format %{ "MOV    $dst,$mem" %}
5809  opcode(0x8B);
5810  ins_encode( OpcP, RegMem(dst,mem));
5811  ins_pipe( ialu_reg_mem );
5812%}
5813
5814// Load Klass Pointer
5815instruct loadKlass(eRegP dst, memory mem) %{
5816  match(Set dst (LoadKlass mem));
5817
5818  ins_cost(125);
5819  format %{ "MOV    $dst,$mem" %}
5820  opcode(0x8B);
5821  ins_encode( OpcP, RegMem(dst,mem));
5822  ins_pipe( ialu_reg_mem );
5823%}
5824
5825// Load Double
5826instruct loadDPR(regDPR dst, memory mem) %{
5827  predicate(UseSSE<=1);
5828  match(Set dst (LoadD mem));
5829
5830  ins_cost(150);
5831  format %{ "FLD_D  ST,$mem\n\t"
5832            "FSTP   $dst" %}
5833  opcode(0xDD);               /* DD /0 */
5834  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5835              Pop_Reg_DPR(dst) );
5836  ins_pipe( fpu_reg_mem );
5837%}
5838
5839// Load Double to XMM
5840instruct loadD(regD dst, memory mem) %{
5841  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5842  match(Set dst (LoadD mem));
5843  ins_cost(145);
5844  format %{ "MOVSD  $dst,$mem" %}
5845  ins_encode %{
5846    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5847  %}
5848  ins_pipe( pipe_slow );
5849%}
5850
5851instruct loadD_partial(regD dst, memory mem) %{
5852  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5853  match(Set dst (LoadD mem));
5854  ins_cost(145);
5855  format %{ "MOVLPD $dst,$mem" %}
5856  ins_encode %{
5857    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5858  %}
5859  ins_pipe( pipe_slow );
5860%}
5861
5862// Load to XMM register (single-precision floating point)
5863// MOVSS instruction
5864instruct loadF(regF dst, memory mem) %{
5865  predicate(UseSSE>=1);
5866  match(Set dst (LoadF mem));
5867  ins_cost(145);
5868  format %{ "MOVSS  $dst,$mem" %}
5869  ins_encode %{
5870    __ movflt ($dst$$XMMRegister, $mem$$Address);
5871  %}
5872  ins_pipe( pipe_slow );
5873%}
5874
5875// Load Float
5876instruct loadFPR(regFPR dst, memory mem) %{
5877  predicate(UseSSE==0);
5878  match(Set dst (LoadF mem));
5879
5880  ins_cost(150);
5881  format %{ "FLD_S  ST,$mem\n\t"
5882            "FSTP   $dst" %}
5883  opcode(0xD9);               /* D9 /0 */
5884  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5885              Pop_Reg_FPR(dst) );
5886  ins_pipe( fpu_reg_mem );
5887%}
5888
5889// Load Effective Address
5890instruct leaP8(eRegP dst, indOffset8 mem) %{
5891  match(Set dst mem);
5892
5893  ins_cost(110);
5894  format %{ "LEA    $dst,$mem" %}
5895  opcode(0x8D);
5896  ins_encode( OpcP, RegMem(dst,mem));
5897  ins_pipe( ialu_reg_reg_fat );
5898%}
5899
5900instruct leaP32(eRegP dst, indOffset32 mem) %{
5901  match(Set dst mem);
5902
5903  ins_cost(110);
5904  format %{ "LEA    $dst,$mem" %}
5905  opcode(0x8D);
5906  ins_encode( OpcP, RegMem(dst,mem));
5907  ins_pipe( ialu_reg_reg_fat );
5908%}
5909
5910instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5911  match(Set dst mem);
5912
5913  ins_cost(110);
5914  format %{ "LEA    $dst,$mem" %}
5915  opcode(0x8D);
5916  ins_encode( OpcP, RegMem(dst,mem));
5917  ins_pipe( ialu_reg_reg_fat );
5918%}
5919
5920instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5921  match(Set dst mem);
5922
5923  ins_cost(110);
5924  format %{ "LEA    $dst,$mem" %}
5925  opcode(0x8D);
5926  ins_encode( OpcP, RegMem(dst,mem));
5927  ins_pipe( ialu_reg_reg_fat );
5928%}
5929
5930instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5931  match(Set dst mem);
5932
5933  ins_cost(110);
5934  format %{ "LEA    $dst,$mem" %}
5935  opcode(0x8D);
5936  ins_encode( OpcP, RegMem(dst,mem));
5937  ins_pipe( ialu_reg_reg_fat );
5938%}
5939
5940// Load Constant
5941instruct loadConI(rRegI dst, immI src) %{
5942  match(Set dst src);
5943
5944  format %{ "MOV    $dst,$src" %}
5945  ins_encode( LdImmI(dst, src) );
5946  ins_pipe( ialu_reg_fat );
5947%}
5948
5949// Load Constant zero
5950instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5951  match(Set dst src);
5952  effect(KILL cr);
5953
5954  ins_cost(50);
5955  format %{ "XOR    $dst,$dst" %}
5956  opcode(0x33);  /* + rd */
5957  ins_encode( OpcP, RegReg( dst, dst ) );
5958  ins_pipe( ialu_reg );
5959%}
5960
5961instruct loadConP(eRegP dst, immP src) %{
5962  match(Set dst src);
5963
5964  format %{ "MOV    $dst,$src" %}
5965  opcode(0xB8);  /* + rd */
5966  ins_encode( LdImmP(dst, src) );
5967  ins_pipe( ialu_reg_fat );
5968%}
5969
5970instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5971  match(Set dst src);
5972  effect(KILL cr);
5973  ins_cost(200);
5974  format %{ "MOV    $dst.lo,$src.lo\n\t"
5975            "MOV    $dst.hi,$src.hi" %}
5976  opcode(0xB8);
5977  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5978  ins_pipe( ialu_reg_long_fat );
5979%}
5980
5981instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5982  match(Set dst src);
5983  effect(KILL cr);
5984  ins_cost(150);
5985  format %{ "XOR    $dst.lo,$dst.lo\n\t"
5986            "XOR    $dst.hi,$dst.hi" %}
5987  opcode(0x33,0x33);
5988  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5989  ins_pipe( ialu_reg_long );
5990%}
5991
5992// The instruction usage is guarded by predicate in operand immFPR().
5993instruct loadConFPR(regFPR dst, immFPR con) %{
5994  match(Set dst con);
5995  ins_cost(125);
5996  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5997            "FSTP   $dst" %}
5998  ins_encode %{
5999    __ fld_s($constantaddress($con));
6000    __ fstp_d($dst$$reg);
6001  %}
6002  ins_pipe(fpu_reg_con);
6003%}
6004
6005// The instruction usage is guarded by predicate in operand immFPR0().
6006instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6007  match(Set dst con);
6008  ins_cost(125);
6009  format %{ "FLDZ   ST\n\t"
6010            "FSTP   $dst" %}
6011  ins_encode %{
6012    __ fldz();
6013    __ fstp_d($dst$$reg);
6014  %}
6015  ins_pipe(fpu_reg_con);
6016%}
6017
6018// The instruction usage is guarded by predicate in operand immFPR1().
6019instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6020  match(Set dst con);
6021  ins_cost(125);
6022  format %{ "FLD1   ST\n\t"
6023            "FSTP   $dst" %}
6024  ins_encode %{
6025    __ fld1();
6026    __ fstp_d($dst$$reg);
6027  %}
6028  ins_pipe(fpu_reg_con);
6029%}
6030
6031// The instruction usage is guarded by predicate in operand immF().
6032instruct loadConF(regF dst, immF con) %{
6033  match(Set dst con);
6034  ins_cost(125);
6035  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6036  ins_encode %{
6037    __ movflt($dst$$XMMRegister, $constantaddress($con));
6038  %}
6039  ins_pipe(pipe_slow);
6040%}
6041
6042// The instruction usage is guarded by predicate in operand immF0().
6043instruct loadConF0(regF dst, immF0 src) %{
6044  match(Set dst src);
6045  ins_cost(100);
6046  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6047  ins_encode %{
6048    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6049  %}
6050  ins_pipe(pipe_slow);
6051%}
6052
6053// The instruction usage is guarded by predicate in operand immDPR().
6054instruct loadConDPR(regDPR dst, immDPR con) %{
6055  match(Set dst con);
6056  ins_cost(125);
6057
6058  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6059            "FSTP   $dst" %}
6060  ins_encode %{
6061    __ fld_d($constantaddress($con));
6062    __ fstp_d($dst$$reg);
6063  %}
6064  ins_pipe(fpu_reg_con);
6065%}
6066
6067// The instruction usage is guarded by predicate in operand immDPR0().
6068instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6069  match(Set dst con);
6070  ins_cost(125);
6071
6072  format %{ "FLDZ   ST\n\t"
6073            "FSTP   $dst" %}
6074  ins_encode %{
6075    __ fldz();
6076    __ fstp_d($dst$$reg);
6077  %}
6078  ins_pipe(fpu_reg_con);
6079%}
6080
6081// The instruction usage is guarded by predicate in operand immDPR1().
6082instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6083  match(Set dst con);
6084  ins_cost(125);
6085
6086  format %{ "FLD1   ST\n\t"
6087            "FSTP   $dst" %}
6088  ins_encode %{
6089    __ fld1();
6090    __ fstp_d($dst$$reg);
6091  %}
6092  ins_pipe(fpu_reg_con);
6093%}
6094
6095// The instruction usage is guarded by predicate in operand immD().
6096instruct loadConD(regD dst, immD con) %{
6097  match(Set dst con);
6098  ins_cost(125);
6099  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6100  ins_encode %{
6101    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6102  %}
6103  ins_pipe(pipe_slow);
6104%}
6105
6106// The instruction usage is guarded by predicate in operand immD0().
6107instruct loadConD0(regD dst, immD0 src) %{
6108  match(Set dst src);
6109  ins_cost(100);
6110  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6111  ins_encode %{
6112    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6113  %}
6114  ins_pipe( pipe_slow );
6115%}
6116
6117// Load Stack Slot
6118instruct loadSSI(rRegI dst, stackSlotI src) %{
6119  match(Set dst src);
6120  ins_cost(125);
6121
6122  format %{ "MOV    $dst,$src" %}
6123  opcode(0x8B);
6124  ins_encode( OpcP, RegMem(dst,src));
6125  ins_pipe( ialu_reg_mem );
6126%}
6127
6128instruct loadSSL(eRegL dst, stackSlotL src) %{
6129  match(Set dst src);
6130
6131  ins_cost(200);
6132  format %{ "MOV    $dst,$src.lo\n\t"
6133            "MOV    $dst+4,$src.hi" %}
6134  opcode(0x8B, 0x8B);
6135  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6136  ins_pipe( ialu_mem_long_reg );
6137%}
6138
6139// Load Stack Slot
6140instruct loadSSP(eRegP dst, stackSlotP src) %{
6141  match(Set dst src);
6142  ins_cost(125);
6143
6144  format %{ "MOV    $dst,$src" %}
6145  opcode(0x8B);
6146  ins_encode( OpcP, RegMem(dst,src));
6147  ins_pipe( ialu_reg_mem );
6148%}
6149
6150// Load Stack Slot
6151instruct loadSSF(regFPR dst, stackSlotF src) %{
6152  match(Set dst src);
6153  ins_cost(125);
6154
6155  format %{ "FLD_S  $src\n\t"
6156            "FSTP   $dst" %}
6157  opcode(0xD9);               /* D9 /0, FLD m32real */
6158  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6159              Pop_Reg_FPR(dst) );
6160  ins_pipe( fpu_reg_mem );
6161%}
6162
6163// Load Stack Slot
6164instruct loadSSD(regDPR dst, stackSlotD src) %{
6165  match(Set dst src);
6166  ins_cost(125);
6167
6168  format %{ "FLD_D  $src\n\t"
6169            "FSTP   $dst" %}
6170  opcode(0xDD);               /* DD /0, FLD m64real */
6171  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6172              Pop_Reg_DPR(dst) );
6173  ins_pipe( fpu_reg_mem );
6174%}
6175
6176// Prefetch instructions for allocation.
6177// Must be safe to execute with invalid address (cannot fault).
6178
6179instruct prefetchAlloc0( memory mem ) %{
6180  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6181  match(PrefetchAllocation mem);
6182  ins_cost(0);
6183  size(0);
6184  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6185  ins_encode();
6186  ins_pipe(empty);
6187%}
6188
6189instruct prefetchAlloc( memory mem ) %{
6190  predicate(AllocatePrefetchInstr==3);
6191  match( PrefetchAllocation mem );
6192  ins_cost(100);
6193
6194  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6195  ins_encode %{
6196    __ prefetchw($mem$$Address);
6197  %}
6198  ins_pipe(ialu_mem);
6199%}
6200
6201instruct prefetchAllocNTA( memory mem ) %{
6202  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6203  match(PrefetchAllocation mem);
6204  ins_cost(100);
6205
6206  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6207  ins_encode %{
6208    __ prefetchnta($mem$$Address);
6209  %}
6210  ins_pipe(ialu_mem);
6211%}
6212
6213instruct prefetchAllocT0( memory mem ) %{
6214  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6215  match(PrefetchAllocation mem);
6216  ins_cost(100);
6217
6218  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6219  ins_encode %{
6220    __ prefetcht0($mem$$Address);
6221  %}
6222  ins_pipe(ialu_mem);
6223%}
6224
6225instruct prefetchAllocT2( memory mem ) %{
6226  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6227  match(PrefetchAllocation mem);
6228  ins_cost(100);
6229
6230  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6231  ins_encode %{
6232    __ prefetcht2($mem$$Address);
6233  %}
6234  ins_pipe(ialu_mem);
6235%}
6236
6237//----------Store Instructions-------------------------------------------------
6238
6239// Store Byte
6240instruct storeB(memory mem, xRegI src) %{
6241  match(Set mem (StoreB mem src));
6242
6243  ins_cost(125);
6244  format %{ "MOV8   $mem,$src" %}
6245  opcode(0x88);
6246  ins_encode( OpcP, RegMem( src, mem ) );
6247  ins_pipe( ialu_mem_reg );
6248%}
6249
6250// Store Char/Short
6251instruct storeC(memory mem, rRegI src) %{
6252  match(Set mem (StoreC mem src));
6253
6254  ins_cost(125);
6255  format %{ "MOV16  $mem,$src" %}
6256  opcode(0x89, 0x66);
6257  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6258  ins_pipe( ialu_mem_reg );
6259%}
6260
6261// Store Integer
6262instruct storeI(memory mem, rRegI src) %{
6263  match(Set mem (StoreI mem src));
6264
6265  ins_cost(125);
6266  format %{ "MOV    $mem,$src" %}
6267  opcode(0x89);
6268  ins_encode( OpcP, RegMem( src, mem ) );
6269  ins_pipe( ialu_mem_reg );
6270%}
6271
6272// Store Long
6273instruct storeL(long_memory mem, eRegL src) %{
6274  predicate(!((StoreLNode*)n)->require_atomic_access());
6275  match(Set mem (StoreL mem src));
6276
6277  ins_cost(200);
6278  format %{ "MOV    $mem,$src.lo\n\t"
6279            "MOV    $mem+4,$src.hi" %}
6280  opcode(0x89, 0x89);
6281  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6282  ins_pipe( ialu_mem_long_reg );
6283%}
6284
6285// Store Long to Integer
6286instruct storeL2I(memory mem, eRegL src) %{
6287  match(Set mem (StoreI mem (ConvL2I src)));
6288
6289  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6290  ins_encode %{
6291    __ movl($mem$$Address, $src$$Register);
6292  %}
6293  ins_pipe(ialu_mem_reg);
6294%}
6295
6296// Volatile Store Long.  Must be atomic, so move it into
6297// the FP TOS and then do a 64-bit FIST.  Has to probe the
6298// target address before the store (for null-ptr checks)
6299// so the memory operand is used twice in the encoding.
6300instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6301  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6302  match(Set mem (StoreL mem src));
6303  effect( KILL cr );
6304  ins_cost(400);
6305  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6306            "FILD   $src\n\t"
6307            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6308  opcode(0x3B);
6309  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6310  ins_pipe( fpu_reg_mem );
6311%}
6312
6313instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6314  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6315  match(Set mem (StoreL mem src));
6316  effect( TEMP tmp, KILL cr );
6317  ins_cost(380);
6318  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6319            "MOVSD  $tmp,$src\n\t"
6320            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6321  ins_encode %{
6322    __ cmpl(rax, $mem$$Address);
6323    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6324    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6325  %}
6326  ins_pipe( pipe_slow );
6327%}
6328
6329instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6330  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6331  match(Set mem (StoreL mem src));
6332  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6333  ins_cost(360);
6334  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6335            "MOVD   $tmp,$src.lo\n\t"
6336            "MOVD   $tmp2,$src.hi\n\t"
6337            "PUNPCKLDQ $tmp,$tmp2\n\t"
6338            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6339  ins_encode %{
6340    __ cmpl(rax, $mem$$Address);
6341    __ movdl($tmp$$XMMRegister, $src$$Register);
6342    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6343    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6344    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6345  %}
6346  ins_pipe( pipe_slow );
6347%}
6348
6349// Store Pointer; for storing unknown oops and raw pointers
6350instruct storeP(memory mem, anyRegP src) %{
6351  match(Set mem (StoreP mem src));
6352
6353  ins_cost(125);
6354  format %{ "MOV    $mem,$src" %}
6355  opcode(0x89);
6356  ins_encode( OpcP, RegMem( src, mem ) );
6357  ins_pipe( ialu_mem_reg );
6358%}
6359
6360// Store Integer Immediate
6361instruct storeImmI(memory mem, immI src) %{
6362  match(Set mem (StoreI mem src));
6363
6364  ins_cost(150);
6365  format %{ "MOV    $mem,$src" %}
6366  opcode(0xC7);               /* C7 /0 */
6367  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6368  ins_pipe( ialu_mem_imm );
6369%}
6370
6371// Store Short/Char Immediate
6372instruct storeImmI16(memory mem, immI16 src) %{
6373  predicate(UseStoreImmI16);
6374  match(Set mem (StoreC mem src));
6375
6376  ins_cost(150);
6377  format %{ "MOV16  $mem,$src" %}
6378  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6379  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6380  ins_pipe( ialu_mem_imm );
6381%}
6382
6383// Store Pointer Immediate; null pointers or constant oops that do not
6384// need card-mark barriers.
6385instruct storeImmP(memory mem, immP src) %{
6386  match(Set mem (StoreP mem src));
6387
6388  ins_cost(150);
6389  format %{ "MOV    $mem,$src" %}
6390  opcode(0xC7);               /* C7 /0 */
6391  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6392  ins_pipe( ialu_mem_imm );
6393%}
6394
6395// Store Byte Immediate
6396instruct storeImmB(memory mem, immI8 src) %{
6397  match(Set mem (StoreB mem src));
6398
6399  ins_cost(150);
6400  format %{ "MOV8   $mem,$src" %}
6401  opcode(0xC6);               /* C6 /0 */
6402  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6403  ins_pipe( ialu_mem_imm );
6404%}
6405
6406// Store CMS card-mark Immediate
6407instruct storeImmCM(memory mem, immI8 src) %{
6408  match(Set mem (StoreCM mem src));
6409
6410  ins_cost(150);
6411  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6412  opcode(0xC6);               /* C6 /0 */
6413  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6414  ins_pipe( ialu_mem_imm );
6415%}
6416
6417// Store Double
6418instruct storeDPR( memory mem, regDPR1 src) %{
6419  predicate(UseSSE<=1);
6420  match(Set mem (StoreD mem src));
6421
6422  ins_cost(100);
6423  format %{ "FST_D  $mem,$src" %}
6424  opcode(0xDD);       /* DD /2 */
6425  ins_encode( enc_FPR_store(mem,src) );
6426  ins_pipe( fpu_mem_reg );
6427%}
6428
6429// Store double does rounding on x86
6430instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6431  predicate(UseSSE<=1);
6432  match(Set mem (StoreD mem (RoundDouble src)));
6433
6434  ins_cost(100);
6435  format %{ "FST_D  $mem,$src\t# round" %}
6436  opcode(0xDD);       /* DD /2 */
6437  ins_encode( enc_FPR_store(mem,src) );
6438  ins_pipe( fpu_mem_reg );
6439%}
6440
6441// Store XMM register to memory (double-precision floating points)
6442// MOVSD instruction
6443instruct storeD(memory mem, regD src) %{
6444  predicate(UseSSE>=2);
6445  match(Set mem (StoreD mem src));
6446  ins_cost(95);
6447  format %{ "MOVSD  $mem,$src" %}
6448  ins_encode %{
6449    __ movdbl($mem$$Address, $src$$XMMRegister);
6450  %}
6451  ins_pipe( pipe_slow );
6452%}
6453
6454// Store XMM register to memory (single-precision floating point)
6455// MOVSS instruction
6456instruct storeF(memory mem, regF src) %{
6457  predicate(UseSSE>=1);
6458  match(Set mem (StoreF mem src));
6459  ins_cost(95);
6460  format %{ "MOVSS  $mem,$src" %}
6461  ins_encode %{
6462    __ movflt($mem$$Address, $src$$XMMRegister);
6463  %}
6464  ins_pipe( pipe_slow );
6465%}
6466
6467// Store Float
6468instruct storeFPR( memory mem, regFPR1 src) %{
6469  predicate(UseSSE==0);
6470  match(Set mem (StoreF mem src));
6471
6472  ins_cost(100);
6473  format %{ "FST_S  $mem,$src" %}
6474  opcode(0xD9);       /* D9 /2 */
6475  ins_encode( enc_FPR_store(mem,src) );
6476  ins_pipe( fpu_mem_reg );
6477%}
6478
6479// Store Float does rounding on x86
6480instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6481  predicate(UseSSE==0);
6482  match(Set mem (StoreF mem (RoundFloat src)));
6483
6484  ins_cost(100);
6485  format %{ "FST_S  $mem,$src\t# round" %}
6486  opcode(0xD9);       /* D9 /2 */
6487  ins_encode( enc_FPR_store(mem,src) );
6488  ins_pipe( fpu_mem_reg );
6489%}
6490
6491// Store Float does rounding on x86
6492instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6493  predicate(UseSSE<=1);
6494  match(Set mem (StoreF mem (ConvD2F src)));
6495
6496  ins_cost(100);
6497  format %{ "FST_S  $mem,$src\t# D-round" %}
6498  opcode(0xD9);       /* D9 /2 */
6499  ins_encode( enc_FPR_store(mem,src) );
6500  ins_pipe( fpu_mem_reg );
6501%}
6502
6503// Store immediate Float value (it is faster than store from FPU register)
6504// The instruction usage is guarded by predicate in operand immFPR().
6505instruct storeFPR_imm( memory mem, immFPR src) %{
6506  match(Set mem (StoreF mem src));
6507
6508  ins_cost(50);
6509  format %{ "MOV    $mem,$src\t# store float" %}
6510  opcode(0xC7);               /* C7 /0 */
6511  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6512  ins_pipe( ialu_mem_imm );
6513%}
6514
6515// Store immediate Float value (it is faster than store from XMM register)
6516// The instruction usage is guarded by predicate in operand immF().
6517instruct storeF_imm( memory mem, immF src) %{
6518  match(Set mem (StoreF mem src));
6519
6520  ins_cost(50);
6521  format %{ "MOV    $mem,$src\t# store float" %}
6522  opcode(0xC7);               /* C7 /0 */
6523  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6524  ins_pipe( ialu_mem_imm );
6525%}
6526
6527// Store Integer to stack slot
6528instruct storeSSI(stackSlotI dst, rRegI src) %{
6529  match(Set dst src);
6530
6531  ins_cost(100);
6532  format %{ "MOV    $dst,$src" %}
6533  opcode(0x89);
6534  ins_encode( OpcPRegSS( dst, src ) );
6535  ins_pipe( ialu_mem_reg );
6536%}
6537
6538// Store Integer to stack slot
6539instruct storeSSP(stackSlotP dst, eRegP src) %{
6540  match(Set dst src);
6541
6542  ins_cost(100);
6543  format %{ "MOV    $dst,$src" %}
6544  opcode(0x89);
6545  ins_encode( OpcPRegSS( dst, src ) );
6546  ins_pipe( ialu_mem_reg );
6547%}
6548
6549// Store Long to stack slot
6550instruct storeSSL(stackSlotL dst, eRegL src) %{
6551  match(Set dst src);
6552
6553  ins_cost(200);
6554  format %{ "MOV    $dst,$src.lo\n\t"
6555            "MOV    $dst+4,$src.hi" %}
6556  opcode(0x89, 0x89);
6557  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6558  ins_pipe( ialu_mem_long_reg );
6559%}
6560
6561//----------MemBar Instructions-----------------------------------------------
6562// Memory barrier flavors
6563
6564instruct membar_acquire() %{
6565  match(MemBarAcquire);
6566  match(LoadFence);
6567  ins_cost(400);
6568
6569  size(0);
6570  format %{ "MEMBAR-acquire ! (empty encoding)" %}
6571  ins_encode();
6572  ins_pipe(empty);
6573%}
6574
6575instruct membar_acquire_lock() %{
6576  match(MemBarAcquireLock);
6577  ins_cost(0);
6578
6579  size(0);
6580  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6581  ins_encode( );
6582  ins_pipe(empty);
6583%}
6584
6585instruct membar_release() %{
6586  match(MemBarRelease);
6587  match(StoreFence);
6588  ins_cost(400);
6589
6590  size(0);
6591  format %{ "MEMBAR-release ! (empty encoding)" %}
6592  ins_encode( );
6593  ins_pipe(empty);
6594%}
6595
6596instruct membar_release_lock() %{
6597  match(MemBarReleaseLock);
6598  ins_cost(0);
6599
6600  size(0);
6601  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6602  ins_encode( );
6603  ins_pipe(empty);
6604%}
6605
6606instruct membar_volatile(eFlagsReg cr) %{
6607  match(MemBarVolatile);
6608  effect(KILL cr);
6609  ins_cost(400);
6610
6611  format %{
6612    $$template
6613    if (os::is_MP()) {
6614      $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6615    } else {
6616      $$emit$$"MEMBAR-volatile ! (empty encoding)"
6617    }
6618  %}
6619  ins_encode %{
6620    __ membar(Assembler::StoreLoad);
6621  %}
6622  ins_pipe(pipe_slow);
6623%}
6624
6625instruct unnecessary_membar_volatile() %{
6626  match(MemBarVolatile);
6627  predicate(Matcher::post_store_load_barrier(n));
6628  ins_cost(0);
6629
6630  size(0);
6631  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6632  ins_encode( );
6633  ins_pipe(empty);
6634%}
6635
6636instruct membar_storestore() %{
6637  match(MemBarStoreStore);
6638  ins_cost(0);
6639
6640  size(0);
6641  format %{ "MEMBAR-storestore (empty encoding)" %}
6642  ins_encode( );
6643  ins_pipe(empty);
6644%}
6645
6646//----------Move Instructions--------------------------------------------------
6647instruct castX2P(eAXRegP dst, eAXRegI src) %{
6648  match(Set dst (CastX2P src));
6649  format %{ "# X2P  $dst, $src" %}
6650  ins_encode( /*empty encoding*/ );
6651  ins_cost(0);
6652  ins_pipe(empty);
6653%}
6654
6655instruct castP2X(rRegI dst, eRegP src ) %{
6656  match(Set dst (CastP2X src));
6657  ins_cost(50);
6658  format %{ "MOV    $dst, $src\t# CastP2X" %}
6659  ins_encode( enc_Copy( dst, src) );
6660  ins_pipe( ialu_reg_reg );
6661%}
6662
6663//----------Conditional Move---------------------------------------------------
6664// Conditional move
6665instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6666  predicate(!VM_Version::supports_cmov() );
6667  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6668  ins_cost(200);
6669  format %{ "J$cop,us skip\t# signed cmove\n\t"
6670            "MOV    $dst,$src\n"
6671      "skip:" %}
6672  ins_encode %{
6673    Label Lskip;
6674    // Invert sense of branch from sense of CMOV
6675    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6676    __ movl($dst$$Register, $src$$Register);
6677    __ bind(Lskip);
6678  %}
6679  ins_pipe( pipe_cmov_reg );
6680%}
6681
6682instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6683  predicate(!VM_Version::supports_cmov() );
6684  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6685  ins_cost(200);
6686  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6687            "MOV    $dst,$src\n"
6688      "skip:" %}
6689  ins_encode %{
6690    Label Lskip;
6691    // Invert sense of branch from sense of CMOV
6692    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6693    __ movl($dst$$Register, $src$$Register);
6694    __ bind(Lskip);
6695  %}
6696  ins_pipe( pipe_cmov_reg );
6697%}
6698
6699instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6700  predicate(VM_Version::supports_cmov() );
6701  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6702  ins_cost(200);
6703  format %{ "CMOV$cop $dst,$src" %}
6704  opcode(0x0F,0x40);
6705  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6706  ins_pipe( pipe_cmov_reg );
6707%}
6708
6709instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6710  predicate(VM_Version::supports_cmov() );
6711  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6712  ins_cost(200);
6713  format %{ "CMOV$cop $dst,$src" %}
6714  opcode(0x0F,0x40);
6715  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6716  ins_pipe( pipe_cmov_reg );
6717%}
6718
6719instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6720  predicate(VM_Version::supports_cmov() );
6721  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6722  ins_cost(200);
6723  expand %{
6724    cmovI_regU(cop, cr, dst, src);
6725  %}
6726%}
6727
6728// Conditional move
6729instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6730  predicate(VM_Version::supports_cmov() );
6731  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6732  ins_cost(250);
6733  format %{ "CMOV$cop $dst,$src" %}
6734  opcode(0x0F,0x40);
6735  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6736  ins_pipe( pipe_cmov_mem );
6737%}
6738
6739// Conditional move
6740instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6741  predicate(VM_Version::supports_cmov() );
6742  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6743  ins_cost(250);
6744  format %{ "CMOV$cop $dst,$src" %}
6745  opcode(0x0F,0x40);
6746  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6747  ins_pipe( pipe_cmov_mem );
6748%}
6749
6750instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6751  predicate(VM_Version::supports_cmov() );
6752  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6753  ins_cost(250);
6754  expand %{
6755    cmovI_memU(cop, cr, dst, src);
6756  %}
6757%}
6758
6759// Conditional move
6760instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6761  predicate(VM_Version::supports_cmov() );
6762  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6763  ins_cost(200);
6764  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6765  opcode(0x0F,0x40);
6766  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6767  ins_pipe( pipe_cmov_reg );
6768%}
6769
6770// Conditional move (non-P6 version)
6771// Note:  a CMoveP is generated for  stubs and native wrappers
6772//        regardless of whether we are on a P6, so we
6773//        emulate a cmov here
6774instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6775  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6776  ins_cost(300);
6777  format %{ "Jn$cop   skip\n\t"
6778          "MOV    $dst,$src\t# pointer\n"
6779      "skip:" %}
6780  opcode(0x8b);
6781  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6782  ins_pipe( pipe_cmov_reg );
6783%}
6784
6785// Conditional move
6786instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6787  predicate(VM_Version::supports_cmov() );
6788  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6789  ins_cost(200);
6790  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6791  opcode(0x0F,0x40);
6792  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6793  ins_pipe( pipe_cmov_reg );
6794%}
6795
6796instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6797  predicate(VM_Version::supports_cmov() );
6798  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6799  ins_cost(200);
6800  expand %{
6801    cmovP_regU(cop, cr, dst, src);
6802  %}
6803%}
6804
6805// DISABLED: Requires the ADLC to emit a bottom_type call that
6806// correctly meets the two pointer arguments; one is an incoming
6807// register but the other is a memory operand.  ALSO appears to
6808// be buggy with implicit null checks.
6809//
6810//// Conditional move
6811//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6812//  predicate(VM_Version::supports_cmov() );
6813//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6814//  ins_cost(250);
6815//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6816//  opcode(0x0F,0x40);
6817//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6818//  ins_pipe( pipe_cmov_mem );
6819//%}
6820//
6821//// Conditional move
6822//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6823//  predicate(VM_Version::supports_cmov() );
6824//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6825//  ins_cost(250);
6826//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6827//  opcode(0x0F,0x40);
6828//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6829//  ins_pipe( pipe_cmov_mem );
6830//%}
6831
6832// Conditional move
6833instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6834  predicate(UseSSE<=1);
6835  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6836  ins_cost(200);
6837  format %{ "FCMOV$cop $dst,$src\t# double" %}
6838  opcode(0xDA);
6839  ins_encode( enc_cmov_dpr(cop,src) );
6840  ins_pipe( pipe_cmovDPR_reg );
6841%}
6842
6843// Conditional move
6844instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6845  predicate(UseSSE==0);
6846  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6847  ins_cost(200);
6848  format %{ "FCMOV$cop $dst,$src\t# float" %}
6849  opcode(0xDA);
6850  ins_encode( enc_cmov_dpr(cop,src) );
6851  ins_pipe( pipe_cmovDPR_reg );
6852%}
6853
6854// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6855instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6856  predicate(UseSSE<=1);
6857  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6858  ins_cost(200);
6859  format %{ "Jn$cop   skip\n\t"
6860            "MOV    $dst,$src\t# double\n"
6861      "skip:" %}
6862  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6863  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6864  ins_pipe( pipe_cmovDPR_reg );
6865%}
6866
6867// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6868instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6869  predicate(UseSSE==0);
6870  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6871  ins_cost(200);
6872  format %{ "Jn$cop    skip\n\t"
6873            "MOV    $dst,$src\t# float\n"
6874      "skip:" %}
6875  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6876  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6877  ins_pipe( pipe_cmovDPR_reg );
6878%}
6879
6880// No CMOVE with SSE/SSE2
6881instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6882  predicate (UseSSE>=1);
6883  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6884  ins_cost(200);
6885  format %{ "Jn$cop   skip\n\t"
6886            "MOVSS  $dst,$src\t# float\n"
6887      "skip:" %}
6888  ins_encode %{
6889    Label skip;
6890    // Invert sense of branch from sense of CMOV
6891    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6892    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6893    __ bind(skip);
6894  %}
6895  ins_pipe( pipe_slow );
6896%}
6897
6898// No CMOVE with SSE/SSE2
6899instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6900  predicate (UseSSE>=2);
6901  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6902  ins_cost(200);
6903  format %{ "Jn$cop   skip\n\t"
6904            "MOVSD  $dst,$src\t# float\n"
6905      "skip:" %}
6906  ins_encode %{
6907    Label skip;
6908    // Invert sense of branch from sense of CMOV
6909    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6910    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6911    __ bind(skip);
6912  %}
6913  ins_pipe( pipe_slow );
6914%}
6915
6916// unsigned version
6917instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6918  predicate (UseSSE>=1);
6919  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6920  ins_cost(200);
6921  format %{ "Jn$cop   skip\n\t"
6922            "MOVSS  $dst,$src\t# float\n"
6923      "skip:" %}
6924  ins_encode %{
6925    Label skip;
6926    // Invert sense of branch from sense of CMOV
6927    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6928    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6929    __ bind(skip);
6930  %}
6931  ins_pipe( pipe_slow );
6932%}
6933
6934instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6935  predicate (UseSSE>=1);
6936  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6937  ins_cost(200);
6938  expand %{
6939    fcmovF_regU(cop, cr, dst, src);
6940  %}
6941%}
6942
6943// unsigned version
6944instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6945  predicate (UseSSE>=2);
6946  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6947  ins_cost(200);
6948  format %{ "Jn$cop   skip\n\t"
6949            "MOVSD  $dst,$src\t# float\n"
6950      "skip:" %}
6951  ins_encode %{
6952    Label skip;
6953    // Invert sense of branch from sense of CMOV
6954    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6955    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6956    __ bind(skip);
6957  %}
6958  ins_pipe( pipe_slow );
6959%}
6960
6961instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6962  predicate (UseSSE>=2);
6963  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6964  ins_cost(200);
6965  expand %{
6966    fcmovD_regU(cop, cr, dst, src);
6967  %}
6968%}
6969
6970instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6971  predicate(VM_Version::supports_cmov() );
6972  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6973  ins_cost(200);
6974  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6975            "CMOV$cop $dst.hi,$src.hi" %}
6976  opcode(0x0F,0x40);
6977  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6978  ins_pipe( pipe_cmov_reg_long );
6979%}
6980
6981instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6982  predicate(VM_Version::supports_cmov() );
6983  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6984  ins_cost(200);
6985  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6986            "CMOV$cop $dst.hi,$src.hi" %}
6987  opcode(0x0F,0x40);
6988  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6989  ins_pipe( pipe_cmov_reg_long );
6990%}
6991
6992instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6993  predicate(VM_Version::supports_cmov() );
6994  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6995  ins_cost(200);
6996  expand %{
6997    cmovL_regU(cop, cr, dst, src);
6998  %}
6999%}
7000
7001//----------Arithmetic Instructions--------------------------------------------
7002//----------Addition Instructions----------------------------------------------
7003
7004// Integer Addition Instructions
7005instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7006  match(Set dst (AddI dst src));
7007  effect(KILL cr);
7008
7009  size(2);
7010  format %{ "ADD    $dst,$src" %}
7011  opcode(0x03);
7012  ins_encode( OpcP, RegReg( dst, src) );
7013  ins_pipe( ialu_reg_reg );
7014%}
7015
7016instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7017  match(Set dst (AddI dst src));
7018  effect(KILL cr);
7019
7020  format %{ "ADD    $dst,$src" %}
7021  opcode(0x81, 0x00); /* /0 id */
7022  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7023  ins_pipe( ialu_reg );
7024%}
7025
7026instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7027  predicate(UseIncDec);
7028  match(Set dst (AddI dst src));
7029  effect(KILL cr);
7030
7031  size(1);
7032  format %{ "INC    $dst" %}
7033  opcode(0x40); /*  */
7034  ins_encode( Opc_plus( primary, dst ) );
7035  ins_pipe( ialu_reg );
7036%}
7037
7038instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7039  match(Set dst (AddI src0 src1));
7040  ins_cost(110);
7041
7042  format %{ "LEA    $dst,[$src0 + $src1]" %}
7043  opcode(0x8D); /* 0x8D /r */
7044  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7045  ins_pipe( ialu_reg_reg );
7046%}
7047
7048instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7049  match(Set dst (AddP src0 src1));
7050  ins_cost(110);
7051
7052  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7053  opcode(0x8D); /* 0x8D /r */
7054  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7055  ins_pipe( ialu_reg_reg );
7056%}
7057
7058instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7059  predicate(UseIncDec);
7060  match(Set dst (AddI dst src));
7061  effect(KILL cr);
7062
7063  size(1);
7064  format %{ "DEC    $dst" %}
7065  opcode(0x48); /*  */
7066  ins_encode( Opc_plus( primary, dst ) );
7067  ins_pipe( ialu_reg );
7068%}
7069
7070instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7071  match(Set dst (AddP dst src));
7072  effect(KILL cr);
7073
7074  size(2);
7075  format %{ "ADD    $dst,$src" %}
7076  opcode(0x03);
7077  ins_encode( OpcP, RegReg( dst, src) );
7078  ins_pipe( ialu_reg_reg );
7079%}
7080
7081instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7082  match(Set dst (AddP dst src));
7083  effect(KILL cr);
7084
7085  format %{ "ADD    $dst,$src" %}
7086  opcode(0x81,0x00); /* Opcode 81 /0 id */
7087  // ins_encode( RegImm( dst, src) );
7088  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7089  ins_pipe( ialu_reg );
7090%}
7091
7092instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7093  match(Set dst (AddI dst (LoadI src)));
7094  effect(KILL cr);
7095
7096  ins_cost(125);
7097  format %{ "ADD    $dst,$src" %}
7098  opcode(0x03);
7099  ins_encode( OpcP, RegMem( dst, src) );
7100  ins_pipe( ialu_reg_mem );
7101%}
7102
7103instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7104  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7105  effect(KILL cr);
7106
7107  ins_cost(150);
7108  format %{ "ADD    $dst,$src" %}
7109  opcode(0x01);  /* Opcode 01 /r */
7110  ins_encode( OpcP, RegMem( src, dst ) );
7111  ins_pipe( ialu_mem_reg );
7112%}
7113
7114// Add Memory with Immediate
7115instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7116  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7117  effect(KILL cr);
7118
7119  ins_cost(125);
7120  format %{ "ADD    $dst,$src" %}
7121  opcode(0x81);               /* Opcode 81 /0 id */
7122  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7123  ins_pipe( ialu_mem_imm );
7124%}
7125
7126instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7127  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7128  effect(KILL cr);
7129
7130  ins_cost(125);
7131  format %{ "INC    $dst" %}
7132  opcode(0xFF);               /* Opcode FF /0 */
7133  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7134  ins_pipe( ialu_mem_imm );
7135%}
7136
7137instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7138  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7139  effect(KILL cr);
7140
7141  ins_cost(125);
7142  format %{ "DEC    $dst" %}
7143  opcode(0xFF);               /* Opcode FF /1 */
7144  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7145  ins_pipe( ialu_mem_imm );
7146%}
7147
7148
7149instruct checkCastPP( eRegP dst ) %{
7150  match(Set dst (CheckCastPP dst));
7151
7152  size(0);
7153  format %{ "#checkcastPP of $dst" %}
7154  ins_encode( /*empty encoding*/ );
7155  ins_pipe( empty );
7156%}
7157
7158instruct castPP( eRegP dst ) %{
7159  match(Set dst (CastPP dst));
7160  format %{ "#castPP of $dst" %}
7161  ins_encode( /*empty encoding*/ );
7162  ins_pipe( empty );
7163%}
7164
7165instruct castII( rRegI dst ) %{
7166  match(Set dst (CastII dst));
7167  format %{ "#castII of $dst" %}
7168  ins_encode( /*empty encoding*/ );
7169  ins_cost(0);
7170  ins_pipe( empty );
7171%}
7172
7173
7174// Load-locked - same as a regular pointer load when used with compare-swap
7175instruct loadPLocked(eRegP dst, memory mem) %{
7176  match(Set dst (LoadPLocked mem));
7177
7178  ins_cost(125);
7179  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7180  opcode(0x8B);
7181  ins_encode( OpcP, RegMem(dst,mem));
7182  ins_pipe( ialu_reg_mem );
7183%}
7184
7185// Conditional-store of the updated heap-top.
7186// Used during allocation of the shared heap.
7187// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7188instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7189  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7190  // EAX is killed if there is contention, but then it's also unused.
7191  // In the common case of no contention, EAX holds the new oop address.
7192  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7193  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7194  ins_pipe( pipe_cmpxchg );
7195%}
7196
7197// Conditional-store of an int value.
7198// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7199instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7200  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7201  effect(KILL oldval);
7202  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7203  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7204  ins_pipe( pipe_cmpxchg );
7205%}
7206
7207// Conditional-store of a long value.
7208// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7209instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7210  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7211  effect(KILL oldval);
7212  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7213            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7214            "XCHG   EBX,ECX"
7215  %}
7216  ins_encode %{
7217    // Note: we need to swap rbx, and rcx before and after the
7218    //       cmpxchg8 instruction because the instruction uses
7219    //       rcx as the high order word of the new value to store but
7220    //       our register encoding uses rbx.
7221    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7222    if( os::is_MP() )
7223      __ lock();
7224    __ cmpxchg8($mem$$Address);
7225    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7226  %}
7227  ins_pipe( pipe_cmpxchg );
7228%}
7229
7230// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7231
7232instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7233  predicate(VM_Version::supports_cx8());
7234  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7235  effect(KILL cr, KILL oldval);
7236  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7237            "MOV    $res,0\n\t"
7238            "JNE,s  fail\n\t"
7239            "MOV    $res,1\n"
7240          "fail:" %}
7241  ins_encode( enc_cmpxchg8(mem_ptr),
7242              enc_flags_ne_to_boolean(res) );
7243  ins_pipe( pipe_cmpxchg );
7244%}
7245
7246instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7247  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7248  effect(KILL cr, KILL oldval);
7249  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7250            "MOV    $res,0\n\t"
7251            "JNE,s  fail\n\t"
7252            "MOV    $res,1\n"
7253          "fail:" %}
7254  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7255  ins_pipe( pipe_cmpxchg );
7256%}
7257
7258instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7259  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7260  effect(KILL cr, KILL oldval);
7261  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7262            "MOV    $res,0\n\t"
7263            "JNE,s  fail\n\t"
7264            "MOV    $res,1\n"
7265          "fail:" %}
7266  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7267  ins_pipe( pipe_cmpxchg );
7268%}
7269
7270instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7271  predicate(n->as_LoadStore()->result_not_used());
7272  match(Set dummy (GetAndAddI mem add));
7273  effect(KILL cr);
7274  format %{ "ADDL  [$mem],$add" %}
7275  ins_encode %{
7276    if (os::is_MP()) { __ lock(); }
7277    __ addl($mem$$Address, $add$$constant);
7278  %}
7279  ins_pipe( pipe_cmpxchg );
7280%}
7281
7282instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7283  match(Set newval (GetAndAddI mem newval));
7284  effect(KILL cr);
7285  format %{ "XADDL  [$mem],$newval" %}
7286  ins_encode %{
7287    if (os::is_MP()) { __ lock(); }
7288    __ xaddl($mem$$Address, $newval$$Register);
7289  %}
7290  ins_pipe( pipe_cmpxchg );
7291%}
7292
7293instruct xchgI( memory mem, rRegI newval) %{
7294  match(Set newval (GetAndSetI mem newval));
7295  format %{ "XCHGL  $newval,[$mem]" %}
7296  ins_encode %{
7297    __ xchgl($newval$$Register, $mem$$Address);
7298  %}
7299  ins_pipe( pipe_cmpxchg );
7300%}
7301
7302instruct xchgP( memory mem, pRegP newval) %{
7303  match(Set newval (GetAndSetP mem newval));
7304  format %{ "XCHGL  $newval,[$mem]" %}
7305  ins_encode %{
7306    __ xchgl($newval$$Register, $mem$$Address);
7307  %}
7308  ins_pipe( pipe_cmpxchg );
7309%}
7310
7311//----------Subtraction Instructions-------------------------------------------
7312
7313// Integer Subtraction Instructions
7314instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7315  match(Set dst (SubI dst src));
7316  effect(KILL cr);
7317
7318  size(2);
7319  format %{ "SUB    $dst,$src" %}
7320  opcode(0x2B);
7321  ins_encode( OpcP, RegReg( dst, src) );
7322  ins_pipe( ialu_reg_reg );
7323%}
7324
7325instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7326  match(Set dst (SubI dst src));
7327  effect(KILL cr);
7328
7329  format %{ "SUB    $dst,$src" %}
7330  opcode(0x81,0x05);  /* Opcode 81 /5 */
7331  // ins_encode( RegImm( dst, src) );
7332  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7333  ins_pipe( ialu_reg );
7334%}
7335
7336instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7337  match(Set dst (SubI dst (LoadI src)));
7338  effect(KILL cr);
7339
7340  ins_cost(125);
7341  format %{ "SUB    $dst,$src" %}
7342  opcode(0x2B);
7343  ins_encode( OpcP, RegMem( dst, src) );
7344  ins_pipe( ialu_reg_mem );
7345%}
7346
7347instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7348  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7349  effect(KILL cr);
7350
7351  ins_cost(150);
7352  format %{ "SUB    $dst,$src" %}
7353  opcode(0x29);  /* Opcode 29 /r */
7354  ins_encode( OpcP, RegMem( src, dst ) );
7355  ins_pipe( ialu_mem_reg );
7356%}
7357
7358// Subtract from a pointer
7359instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7360  match(Set dst (AddP dst (SubI zero src)));
7361  effect(KILL cr);
7362
7363  size(2);
7364  format %{ "SUB    $dst,$src" %}
7365  opcode(0x2B);
7366  ins_encode( OpcP, RegReg( dst, src) );
7367  ins_pipe( ialu_reg_reg );
7368%}
7369
7370instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7371  match(Set dst (SubI zero dst));
7372  effect(KILL cr);
7373
7374  size(2);
7375  format %{ "NEG    $dst" %}
7376  opcode(0xF7,0x03);  // Opcode F7 /3
7377  ins_encode( OpcP, RegOpc( dst ) );
7378  ins_pipe( ialu_reg );
7379%}
7380
7381//----------Multiplication/Division Instructions-------------------------------
7382// Integer Multiplication Instructions
7383// Multiply Register
7384instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7385  match(Set dst (MulI dst src));
7386  effect(KILL cr);
7387
7388  size(3);
7389  ins_cost(300);
7390  format %{ "IMUL   $dst,$src" %}
7391  opcode(0xAF, 0x0F);
7392  ins_encode( OpcS, OpcP, RegReg( dst, src) );
7393  ins_pipe( ialu_reg_reg_alu0 );
7394%}
7395
7396// Multiply 32-bit Immediate
7397instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7398  match(Set dst (MulI src imm));
7399  effect(KILL cr);
7400
7401  ins_cost(300);
7402  format %{ "IMUL   $dst,$src,$imm" %}
7403  opcode(0x69);  /* 69 /r id */
7404  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7405  ins_pipe( ialu_reg_reg_alu0 );
7406%}
7407
7408instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7409  match(Set dst src);
7410  effect(KILL cr);
7411
7412  // Note that this is artificially increased to make it more expensive than loadConL
7413  ins_cost(250);
7414  format %{ "MOV    EAX,$src\t// low word only" %}
7415  opcode(0xB8);
7416  ins_encode( LdImmL_Lo(dst, src) );
7417  ins_pipe( ialu_reg_fat );
7418%}
7419
7420// Multiply by 32-bit Immediate, taking the shifted high order results
7421//  (special case for shift by 32)
7422instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7423  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7424  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7425             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7426             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7427  effect(USE src1, KILL cr);
7428
7429  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7430  ins_cost(0*100 + 1*400 - 150);
7431  format %{ "IMUL   EDX:EAX,$src1" %}
7432  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7433  ins_pipe( pipe_slow );
7434%}
7435
7436// Multiply by 32-bit Immediate, taking the shifted high order results
7437instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7438  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7439  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7440             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7441             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7442  effect(USE src1, KILL cr);
7443
7444  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7445  ins_cost(1*100 + 1*400 - 150);
7446  format %{ "IMUL   EDX:EAX,$src1\n\t"
7447            "SAR    EDX,$cnt-32" %}
7448  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7449  ins_pipe( pipe_slow );
7450%}
7451
7452// Multiply Memory 32-bit Immediate
7453instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7454  match(Set dst (MulI (LoadI src) imm));
7455  effect(KILL cr);
7456
7457  ins_cost(300);
7458  format %{ "IMUL   $dst,$src,$imm" %}
7459  opcode(0x69);  /* 69 /r id */
7460  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7461  ins_pipe( ialu_reg_mem_alu0 );
7462%}
7463
7464// Multiply Memory
7465instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7466  match(Set dst (MulI dst (LoadI src)));
7467  effect(KILL cr);
7468
7469  ins_cost(350);
7470  format %{ "IMUL   $dst,$src" %}
7471  opcode(0xAF, 0x0F);
7472  ins_encode( OpcS, OpcP, RegMem( dst, src) );
7473  ins_pipe( ialu_reg_mem_alu0 );
7474%}
7475
7476// Multiply Register Int to Long
7477instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7478  // Basic Idea: long = (long)int * (long)int
7479  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7480  effect(DEF dst, USE src, USE src1, KILL flags);
7481
7482  ins_cost(300);
7483  format %{ "IMUL   $dst,$src1" %}
7484
7485  ins_encode( long_int_multiply( dst, src1 ) );
7486  ins_pipe( ialu_reg_reg_alu0 );
7487%}
7488
7489instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7490  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7491  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7492  effect(KILL flags);
7493
7494  ins_cost(300);
7495  format %{ "MUL    $dst,$src1" %}
7496
7497  ins_encode( long_uint_multiply(dst, src1) );
7498  ins_pipe( ialu_reg_reg_alu0 );
7499%}
7500
7501// Multiply Register Long
7502instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7503  match(Set dst (MulL dst src));
7504  effect(KILL cr, TEMP tmp);
7505  ins_cost(4*100+3*400);
7506// Basic idea: lo(result) = lo(x_lo * y_lo)
7507//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7508  format %{ "MOV    $tmp,$src.lo\n\t"
7509            "IMUL   $tmp,EDX\n\t"
7510            "MOV    EDX,$src.hi\n\t"
7511            "IMUL   EDX,EAX\n\t"
7512            "ADD    $tmp,EDX\n\t"
7513            "MUL    EDX:EAX,$src.lo\n\t"
7514            "ADD    EDX,$tmp" %}
7515  ins_encode( long_multiply( dst, src, tmp ) );
7516  ins_pipe( pipe_slow );
7517%}
7518
7519// Multiply Register Long where the left operand's high 32 bits are zero
7520instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7521  predicate(is_operand_hi32_zero(n->in(1)));
7522  match(Set dst (MulL dst src));
7523  effect(KILL cr, TEMP tmp);
7524  ins_cost(2*100+2*400);
7525// Basic idea: lo(result) = lo(x_lo * y_lo)
7526//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7527  format %{ "MOV    $tmp,$src.hi\n\t"
7528            "IMUL   $tmp,EAX\n\t"
7529            "MUL    EDX:EAX,$src.lo\n\t"
7530            "ADD    EDX,$tmp" %}
7531  ins_encode %{
7532    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7533    __ imull($tmp$$Register, rax);
7534    __ mull($src$$Register);
7535    __ addl(rdx, $tmp$$Register);
7536  %}
7537  ins_pipe( pipe_slow );
7538%}
7539
7540// Multiply Register Long where the right operand's high 32 bits are zero
7541instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7542  predicate(is_operand_hi32_zero(n->in(2)));
7543  match(Set dst (MulL dst src));
7544  effect(KILL cr, TEMP tmp);
7545  ins_cost(2*100+2*400);
7546// Basic idea: lo(result) = lo(x_lo * y_lo)
7547//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7548  format %{ "MOV    $tmp,$src.lo\n\t"
7549            "IMUL   $tmp,EDX\n\t"
7550            "MUL    EDX:EAX,$src.lo\n\t"
7551            "ADD    EDX,$tmp" %}
7552  ins_encode %{
7553    __ movl($tmp$$Register, $src$$Register);
7554    __ imull($tmp$$Register, rdx);
7555    __ mull($src$$Register);
7556    __ addl(rdx, $tmp$$Register);
7557  %}
7558  ins_pipe( pipe_slow );
7559%}
7560
7561// Multiply Register Long where the left and the right operands' high 32 bits are zero
7562instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7563  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7564  match(Set dst (MulL dst src));
7565  effect(KILL cr);
7566  ins_cost(1*400);
7567// Basic idea: lo(result) = lo(x_lo * y_lo)
7568//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7569  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7570  ins_encode %{
7571    __ mull($src$$Register);
7572  %}
7573  ins_pipe( pipe_slow );
7574%}
7575
7576// Multiply Register Long by small constant
7577instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7578  match(Set dst (MulL dst src));
7579  effect(KILL cr, TEMP tmp);
7580  ins_cost(2*100+2*400);
7581  size(12);
7582// Basic idea: lo(result) = lo(src * EAX)
7583//             hi(result) = hi(src * EAX) + lo(src * EDX)
7584  format %{ "IMUL   $tmp,EDX,$src\n\t"
7585            "MOV    EDX,$src\n\t"
7586            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7587            "ADD    EDX,$tmp" %}
7588  ins_encode( long_multiply_con( dst, src, tmp ) );
7589  ins_pipe( pipe_slow );
7590%}
7591
7592// Integer DIV with Register
7593instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7594  match(Set rax (DivI rax div));
7595  effect(KILL rdx, KILL cr);
7596  size(26);
7597  ins_cost(30*100+10*100);
7598  format %{ "CMP    EAX,0x80000000\n\t"
7599            "JNE,s  normal\n\t"
7600            "XOR    EDX,EDX\n\t"
7601            "CMP    ECX,-1\n\t"
7602            "JE,s   done\n"
7603    "normal: CDQ\n\t"
7604            "IDIV   $div\n\t"
7605    "done:"        %}
7606  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7607  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7608  ins_pipe( ialu_reg_reg_alu0 );
7609%}
7610
7611// Divide Register Long
7612instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7613  match(Set dst (DivL src1 src2));
7614  effect( KILL cr, KILL cx, KILL bx );
7615  ins_cost(10000);
7616  format %{ "PUSH   $src1.hi\n\t"
7617            "PUSH   $src1.lo\n\t"
7618            "PUSH   $src2.hi\n\t"
7619            "PUSH   $src2.lo\n\t"
7620            "CALL   SharedRuntime::ldiv\n\t"
7621            "ADD    ESP,16" %}
7622  ins_encode( long_div(src1,src2) );
7623  ins_pipe( pipe_slow );
7624%}
7625
7626// Integer DIVMOD with Register, both quotient and mod results
7627instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7628  match(DivModI rax div);
7629  effect(KILL cr);
7630  size(26);
7631  ins_cost(30*100+10*100);
7632  format %{ "CMP    EAX,0x80000000\n\t"
7633            "JNE,s  normal\n\t"
7634            "XOR    EDX,EDX\n\t"
7635            "CMP    ECX,-1\n\t"
7636            "JE,s   done\n"
7637    "normal: CDQ\n\t"
7638            "IDIV   $div\n\t"
7639    "done:"        %}
7640  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7641  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7642  ins_pipe( pipe_slow );
7643%}
7644
7645// Integer MOD with Register
7646instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7647  match(Set rdx (ModI rax div));
7648  effect(KILL rax, KILL cr);
7649
7650  size(26);
7651  ins_cost(300);
7652  format %{ "CDQ\n\t"
7653            "IDIV   $div" %}
7654  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7655  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7656  ins_pipe( ialu_reg_reg_alu0 );
7657%}
7658
7659// Remainder Register Long
7660instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7661  match(Set dst (ModL src1 src2));
7662  effect( KILL cr, KILL cx, KILL bx );
7663  ins_cost(10000);
7664  format %{ "PUSH   $src1.hi\n\t"
7665            "PUSH   $src1.lo\n\t"
7666            "PUSH   $src2.hi\n\t"
7667            "PUSH   $src2.lo\n\t"
7668            "CALL   SharedRuntime::lrem\n\t"
7669            "ADD    ESP,16" %}
7670  ins_encode( long_mod(src1,src2) );
7671  ins_pipe( pipe_slow );
7672%}
7673
7674// Divide Register Long (no special case since divisor != -1)
7675instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7676  match(Set dst (DivL dst imm));
7677  effect( TEMP tmp, TEMP tmp2, KILL cr );
7678  ins_cost(1000);
7679  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7680            "XOR    $tmp2,$tmp2\n\t"
7681            "CMP    $tmp,EDX\n\t"
7682            "JA,s   fast\n\t"
7683            "MOV    $tmp2,EAX\n\t"
7684            "MOV    EAX,EDX\n\t"
7685            "MOV    EDX,0\n\t"
7686            "JLE,s  pos\n\t"
7687            "LNEG   EAX : $tmp2\n\t"
7688            "DIV    $tmp # unsigned division\n\t"
7689            "XCHG   EAX,$tmp2\n\t"
7690            "DIV    $tmp\n\t"
7691            "LNEG   $tmp2 : EAX\n\t"
7692            "JMP,s  done\n"
7693    "pos:\n\t"
7694            "DIV    $tmp\n\t"
7695            "XCHG   EAX,$tmp2\n"
7696    "fast:\n\t"
7697            "DIV    $tmp\n"
7698    "done:\n\t"
7699            "MOV    EDX,$tmp2\n\t"
7700            "NEG    EDX:EAX # if $imm < 0" %}
7701  ins_encode %{
7702    int con = (int)$imm$$constant;
7703    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7704    int pcon = (con > 0) ? con : -con;
7705    Label Lfast, Lpos, Ldone;
7706
7707    __ movl($tmp$$Register, pcon);
7708    __ xorl($tmp2$$Register,$tmp2$$Register);
7709    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7710    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7711
7712    __ movl($tmp2$$Register, $dst$$Register); // save
7713    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7714    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7715    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7716
7717    // Negative dividend.
7718    // convert value to positive to use unsigned division
7719    __ lneg($dst$$Register, $tmp2$$Register);
7720    __ divl($tmp$$Register);
7721    __ xchgl($dst$$Register, $tmp2$$Register);
7722    __ divl($tmp$$Register);
7723    // revert result back to negative
7724    __ lneg($tmp2$$Register, $dst$$Register);
7725    __ jmpb(Ldone);
7726
7727    __ bind(Lpos);
7728    __ divl($tmp$$Register); // Use unsigned division
7729    __ xchgl($dst$$Register, $tmp2$$Register);
7730    // Fallthrow for final divide, tmp2 has 32 bit hi result
7731
7732    __ bind(Lfast);
7733    // fast path: src is positive
7734    __ divl($tmp$$Register); // Use unsigned division
7735
7736    __ bind(Ldone);
7737    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7738    if (con < 0) {
7739      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7740    }
7741  %}
7742  ins_pipe( pipe_slow );
7743%}
7744
7745// Remainder Register Long (remainder fit into 32 bits)
7746instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7747  match(Set dst (ModL dst imm));
7748  effect( TEMP tmp, TEMP tmp2, KILL cr );
7749  ins_cost(1000);
7750  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7751            "CMP    $tmp,EDX\n\t"
7752            "JA,s   fast\n\t"
7753            "MOV    $tmp2,EAX\n\t"
7754            "MOV    EAX,EDX\n\t"
7755            "MOV    EDX,0\n\t"
7756            "JLE,s  pos\n\t"
7757            "LNEG   EAX : $tmp2\n\t"
7758            "DIV    $tmp # unsigned division\n\t"
7759            "MOV    EAX,$tmp2\n\t"
7760            "DIV    $tmp\n\t"
7761            "NEG    EDX\n\t"
7762            "JMP,s  done\n"
7763    "pos:\n\t"
7764            "DIV    $tmp\n\t"
7765            "MOV    EAX,$tmp2\n"
7766    "fast:\n\t"
7767            "DIV    $tmp\n"
7768    "done:\n\t"
7769            "MOV    EAX,EDX\n\t"
7770            "SAR    EDX,31\n\t" %}
7771  ins_encode %{
7772    int con = (int)$imm$$constant;
7773    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7774    int pcon = (con > 0) ? con : -con;
7775    Label  Lfast, Lpos, Ldone;
7776
7777    __ movl($tmp$$Register, pcon);
7778    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7779    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7780
7781    __ movl($tmp2$$Register, $dst$$Register); // save
7782    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7783    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7784    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7785
7786    // Negative dividend.
7787    // convert value to positive to use unsigned division
7788    __ lneg($dst$$Register, $tmp2$$Register);
7789    __ divl($tmp$$Register);
7790    __ movl($dst$$Register, $tmp2$$Register);
7791    __ divl($tmp$$Register);
7792    // revert remainder back to negative
7793    __ negl(HIGH_FROM_LOW($dst$$Register));
7794    __ jmpb(Ldone);
7795
7796    __ bind(Lpos);
7797    __ divl($tmp$$Register);
7798    __ movl($dst$$Register, $tmp2$$Register);
7799
7800    __ bind(Lfast);
7801    // fast path: src is positive
7802    __ divl($tmp$$Register);
7803
7804    __ bind(Ldone);
7805    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7806    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7807
7808  %}
7809  ins_pipe( pipe_slow );
7810%}
7811
7812// Integer Shift Instructions
7813// Shift Left by one
7814instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7815  match(Set dst (LShiftI dst shift));
7816  effect(KILL cr);
7817
7818  size(2);
7819  format %{ "SHL    $dst,$shift" %}
7820  opcode(0xD1, 0x4);  /* D1 /4 */
7821  ins_encode( OpcP, RegOpc( dst ) );
7822  ins_pipe( ialu_reg );
7823%}
7824
7825// Shift Left by 8-bit immediate
7826instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7827  match(Set dst (LShiftI dst shift));
7828  effect(KILL cr);
7829
7830  size(3);
7831  format %{ "SHL    $dst,$shift" %}
7832  opcode(0xC1, 0x4);  /* C1 /4 ib */
7833  ins_encode( RegOpcImm( dst, shift) );
7834  ins_pipe( ialu_reg );
7835%}
7836
7837// Shift Left by variable
7838instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7839  match(Set dst (LShiftI dst shift));
7840  effect(KILL cr);
7841
7842  size(2);
7843  format %{ "SHL    $dst,$shift" %}
7844  opcode(0xD3, 0x4);  /* D3 /4 */
7845  ins_encode( OpcP, RegOpc( dst ) );
7846  ins_pipe( ialu_reg_reg );
7847%}
7848
7849// Arithmetic shift right by one
7850instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7851  match(Set dst (RShiftI dst shift));
7852  effect(KILL cr);
7853
7854  size(2);
7855  format %{ "SAR    $dst,$shift" %}
7856  opcode(0xD1, 0x7);  /* D1 /7 */
7857  ins_encode( OpcP, RegOpc( dst ) );
7858  ins_pipe( ialu_reg );
7859%}
7860
7861// Arithmetic shift right by one
7862instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7863  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7864  effect(KILL cr);
7865  format %{ "SAR    $dst,$shift" %}
7866  opcode(0xD1, 0x7);  /* D1 /7 */
7867  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7868  ins_pipe( ialu_mem_imm );
7869%}
7870
7871// Arithmetic Shift Right by 8-bit immediate
7872instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7873  match(Set dst (RShiftI dst shift));
7874  effect(KILL cr);
7875
7876  size(3);
7877  format %{ "SAR    $dst,$shift" %}
7878  opcode(0xC1, 0x7);  /* C1 /7 ib */
7879  ins_encode( RegOpcImm( dst, shift ) );
7880  ins_pipe( ialu_mem_imm );
7881%}
7882
7883// Arithmetic Shift Right by 8-bit immediate
7884instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7885  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7886  effect(KILL cr);
7887
7888  format %{ "SAR    $dst,$shift" %}
7889  opcode(0xC1, 0x7);  /* C1 /7 ib */
7890  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7891  ins_pipe( ialu_mem_imm );
7892%}
7893
7894// Arithmetic Shift Right by variable
7895instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7896  match(Set dst (RShiftI dst shift));
7897  effect(KILL cr);
7898
7899  size(2);
7900  format %{ "SAR    $dst,$shift" %}
7901  opcode(0xD3, 0x7);  /* D3 /7 */
7902  ins_encode( OpcP, RegOpc( dst ) );
7903  ins_pipe( ialu_reg_reg );
7904%}
7905
7906// Logical shift right by one
7907instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7908  match(Set dst (URShiftI dst shift));
7909  effect(KILL cr);
7910
7911  size(2);
7912  format %{ "SHR    $dst,$shift" %}
7913  opcode(0xD1, 0x5);  /* D1 /5 */
7914  ins_encode( OpcP, RegOpc( dst ) );
7915  ins_pipe( ialu_reg );
7916%}
7917
7918// Logical Shift Right by 8-bit immediate
7919instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7920  match(Set dst (URShiftI dst shift));
7921  effect(KILL cr);
7922
7923  size(3);
7924  format %{ "SHR    $dst,$shift" %}
7925  opcode(0xC1, 0x5);  /* C1 /5 ib */
7926  ins_encode( RegOpcImm( dst, shift) );
7927  ins_pipe( ialu_reg );
7928%}
7929
7930
7931// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7932// This idiom is used by the compiler for the i2b bytecode.
7933instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7934  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7935
7936  size(3);
7937  format %{ "MOVSX  $dst,$src :8" %}
7938  ins_encode %{
7939    __ movsbl($dst$$Register, $src$$Register);
7940  %}
7941  ins_pipe(ialu_reg_reg);
7942%}
7943
7944// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7945// This idiom is used by the compiler the i2s bytecode.
7946instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7947  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7948
7949  size(3);
7950  format %{ "MOVSX  $dst,$src :16" %}
7951  ins_encode %{
7952    __ movswl($dst$$Register, $src$$Register);
7953  %}
7954  ins_pipe(ialu_reg_reg);
7955%}
7956
7957
7958// Logical Shift Right by variable
7959instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7960  match(Set dst (URShiftI dst shift));
7961  effect(KILL cr);
7962
7963  size(2);
7964  format %{ "SHR    $dst,$shift" %}
7965  opcode(0xD3, 0x5);  /* D3 /5 */
7966  ins_encode( OpcP, RegOpc( dst ) );
7967  ins_pipe( ialu_reg_reg );
7968%}
7969
7970
7971//----------Logical Instructions-----------------------------------------------
7972//----------Integer Logical Instructions---------------------------------------
7973// And Instructions
7974// And Register with Register
7975instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7976  match(Set dst (AndI dst src));
7977  effect(KILL cr);
7978
7979  size(2);
7980  format %{ "AND    $dst,$src" %}
7981  opcode(0x23);
7982  ins_encode( OpcP, RegReg( dst, src) );
7983  ins_pipe( ialu_reg_reg );
7984%}
7985
7986// And Register with Immediate
7987instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7988  match(Set dst (AndI dst src));
7989  effect(KILL cr);
7990
7991  format %{ "AND    $dst,$src" %}
7992  opcode(0x81,0x04);  /* Opcode 81 /4 */
7993  // ins_encode( RegImm( dst, src) );
7994  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7995  ins_pipe( ialu_reg );
7996%}
7997
7998// And Register with Memory
7999instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8000  match(Set dst (AndI dst (LoadI src)));
8001  effect(KILL cr);
8002
8003  ins_cost(125);
8004  format %{ "AND    $dst,$src" %}
8005  opcode(0x23);
8006  ins_encode( OpcP, RegMem( dst, src) );
8007  ins_pipe( ialu_reg_mem );
8008%}
8009
8010// And Memory with Register
8011instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8012  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8013  effect(KILL cr);
8014
8015  ins_cost(150);
8016  format %{ "AND    $dst,$src" %}
8017  opcode(0x21);  /* Opcode 21 /r */
8018  ins_encode( OpcP, RegMem( src, dst ) );
8019  ins_pipe( ialu_mem_reg );
8020%}
8021
8022// And Memory with Immediate
8023instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8024  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8025  effect(KILL cr);
8026
8027  ins_cost(125);
8028  format %{ "AND    $dst,$src" %}
8029  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8030  // ins_encode( MemImm( dst, src) );
8031  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8032  ins_pipe( ialu_mem_imm );
8033%}
8034
8035// BMI1 instructions
8036instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8037  match(Set dst (AndI (XorI src1 minus_1) src2));
8038  predicate(UseBMI1Instructions);
8039  effect(KILL cr);
8040
8041  format %{ "ANDNL  $dst, $src1, $src2" %}
8042
8043  ins_encode %{
8044    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8045  %}
8046  ins_pipe(ialu_reg);
8047%}
8048
8049instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8050  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8051  predicate(UseBMI1Instructions);
8052  effect(KILL cr);
8053
8054  ins_cost(125);
8055  format %{ "ANDNL  $dst, $src1, $src2" %}
8056
8057  ins_encode %{
8058    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8059  %}
8060  ins_pipe(ialu_reg_mem);
8061%}
8062
8063instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8064  match(Set dst (AndI (SubI imm_zero src) src));
8065  predicate(UseBMI1Instructions);
8066  effect(KILL cr);
8067
8068  format %{ "BLSIL  $dst, $src" %}
8069
8070  ins_encode %{
8071    __ blsil($dst$$Register, $src$$Register);
8072  %}
8073  ins_pipe(ialu_reg);
8074%}
8075
8076instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8077  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8078  predicate(UseBMI1Instructions);
8079  effect(KILL cr);
8080
8081  ins_cost(125);
8082  format %{ "BLSIL  $dst, $src" %}
8083
8084  ins_encode %{
8085    __ blsil($dst$$Register, $src$$Address);
8086  %}
8087  ins_pipe(ialu_reg_mem);
8088%}
8089
8090instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8091%{
8092  match(Set dst (XorI (AddI src minus_1) src));
8093  predicate(UseBMI1Instructions);
8094  effect(KILL cr);
8095
8096  format %{ "BLSMSKL $dst, $src" %}
8097
8098  ins_encode %{
8099    __ blsmskl($dst$$Register, $src$$Register);
8100  %}
8101
8102  ins_pipe(ialu_reg);
8103%}
8104
8105instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8106%{
8107  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8108  predicate(UseBMI1Instructions);
8109  effect(KILL cr);
8110
8111  ins_cost(125);
8112  format %{ "BLSMSKL $dst, $src" %}
8113
8114  ins_encode %{
8115    __ blsmskl($dst$$Register, $src$$Address);
8116  %}
8117
8118  ins_pipe(ialu_reg_mem);
8119%}
8120
8121instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8122%{
8123  match(Set dst (AndI (AddI src minus_1) src) );
8124  predicate(UseBMI1Instructions);
8125  effect(KILL cr);
8126
8127  format %{ "BLSRL  $dst, $src" %}
8128
8129  ins_encode %{
8130    __ blsrl($dst$$Register, $src$$Register);
8131  %}
8132
8133  ins_pipe(ialu_reg);
8134%}
8135
8136instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8137%{
8138  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8139  predicate(UseBMI1Instructions);
8140  effect(KILL cr);
8141
8142  ins_cost(125);
8143  format %{ "BLSRL  $dst, $src" %}
8144
8145  ins_encode %{
8146    __ blsrl($dst$$Register, $src$$Address);
8147  %}
8148
8149  ins_pipe(ialu_reg_mem);
8150%}
8151
8152// Or Instructions
8153// Or Register with Register
8154instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8155  match(Set dst (OrI dst src));
8156  effect(KILL cr);
8157
8158  size(2);
8159  format %{ "OR     $dst,$src" %}
8160  opcode(0x0B);
8161  ins_encode( OpcP, RegReg( dst, src) );
8162  ins_pipe( ialu_reg_reg );
8163%}
8164
8165instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8166  match(Set dst (OrI dst (CastP2X src)));
8167  effect(KILL cr);
8168
8169  size(2);
8170  format %{ "OR     $dst,$src" %}
8171  opcode(0x0B);
8172  ins_encode( OpcP, RegReg( dst, src) );
8173  ins_pipe( ialu_reg_reg );
8174%}
8175
8176
8177// Or Register with Immediate
8178instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8179  match(Set dst (OrI dst src));
8180  effect(KILL cr);
8181
8182  format %{ "OR     $dst,$src" %}
8183  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8184  // ins_encode( RegImm( dst, src) );
8185  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8186  ins_pipe( ialu_reg );
8187%}
8188
8189// Or Register with Memory
8190instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8191  match(Set dst (OrI dst (LoadI src)));
8192  effect(KILL cr);
8193
8194  ins_cost(125);
8195  format %{ "OR     $dst,$src" %}
8196  opcode(0x0B);
8197  ins_encode( OpcP, RegMem( dst, src) );
8198  ins_pipe( ialu_reg_mem );
8199%}
8200
8201// Or Memory with Register
8202instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8203  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8204  effect(KILL cr);
8205
8206  ins_cost(150);
8207  format %{ "OR     $dst,$src" %}
8208  opcode(0x09);  /* Opcode 09 /r */
8209  ins_encode( OpcP, RegMem( src, dst ) );
8210  ins_pipe( ialu_mem_reg );
8211%}
8212
8213// Or Memory with Immediate
8214instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8215  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8216  effect(KILL cr);
8217
8218  ins_cost(125);
8219  format %{ "OR     $dst,$src" %}
8220  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8221  // ins_encode( MemImm( dst, src) );
8222  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8223  ins_pipe( ialu_mem_imm );
8224%}
8225
8226// ROL/ROR
8227// ROL expand
8228instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8229  effect(USE_DEF dst, USE shift, KILL cr);
8230
8231  format %{ "ROL    $dst, $shift" %}
8232  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8233  ins_encode( OpcP, RegOpc( dst ));
8234  ins_pipe( ialu_reg );
8235%}
8236
8237instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8238  effect(USE_DEF dst, USE shift, KILL cr);
8239
8240  format %{ "ROL    $dst, $shift" %}
8241  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8242  ins_encode( RegOpcImm(dst, shift) );
8243  ins_pipe(ialu_reg);
8244%}
8245
8246instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8247  effect(USE_DEF dst, USE shift, KILL cr);
8248
8249  format %{ "ROL    $dst, $shift" %}
8250  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8251  ins_encode(OpcP, RegOpc(dst));
8252  ins_pipe( ialu_reg_reg );
8253%}
8254// end of ROL expand
8255
8256// ROL 32bit by one once
8257instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8258  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8259
8260  expand %{
8261    rolI_eReg_imm1(dst, lshift, cr);
8262  %}
8263%}
8264
8265// ROL 32bit var by imm8 once
8266instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8267  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8268  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8269
8270  expand %{
8271    rolI_eReg_imm8(dst, lshift, cr);
8272  %}
8273%}
8274
8275// ROL 32bit var by var once
8276instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8277  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8278
8279  expand %{
8280    rolI_eReg_CL(dst, shift, cr);
8281  %}
8282%}
8283
8284// ROL 32bit var by var once
8285instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8286  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8287
8288  expand %{
8289    rolI_eReg_CL(dst, shift, cr);
8290  %}
8291%}
8292
8293// ROR expand
8294instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8295  effect(USE_DEF dst, USE shift, KILL cr);
8296
8297  format %{ "ROR    $dst, $shift" %}
8298  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8299  ins_encode( OpcP, RegOpc( dst ) );
8300  ins_pipe( ialu_reg );
8301%}
8302
8303instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8304  effect (USE_DEF dst, USE shift, KILL cr);
8305
8306  format %{ "ROR    $dst, $shift" %}
8307  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8308  ins_encode( RegOpcImm(dst, shift) );
8309  ins_pipe( ialu_reg );
8310%}
8311
8312instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8313  effect(USE_DEF dst, USE shift, KILL cr);
8314
8315  format %{ "ROR    $dst, $shift" %}
8316  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8317  ins_encode(OpcP, RegOpc(dst));
8318  ins_pipe( ialu_reg_reg );
8319%}
8320// end of ROR expand
8321
8322// ROR right once
8323instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8324  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8325
8326  expand %{
8327    rorI_eReg_imm1(dst, rshift, cr);
8328  %}
8329%}
8330
8331// ROR 32bit by immI8 once
8332instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8333  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8334  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8335
8336  expand %{
8337    rorI_eReg_imm8(dst, rshift, cr);
8338  %}
8339%}
8340
8341// ROR 32bit var by var once
8342instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8343  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8344
8345  expand %{
8346    rorI_eReg_CL(dst, shift, cr);
8347  %}
8348%}
8349
8350// ROR 32bit var by var once
8351instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8352  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8353
8354  expand %{
8355    rorI_eReg_CL(dst, shift, cr);
8356  %}
8357%}
8358
8359// Xor Instructions
8360// Xor Register with Register
8361instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8362  match(Set dst (XorI dst src));
8363  effect(KILL cr);
8364
8365  size(2);
8366  format %{ "XOR    $dst,$src" %}
8367  opcode(0x33);
8368  ins_encode( OpcP, RegReg( dst, src) );
8369  ins_pipe( ialu_reg_reg );
8370%}
8371
8372// Xor Register with Immediate -1
8373instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8374  match(Set dst (XorI dst imm));
8375
8376  size(2);
8377  format %{ "NOT    $dst" %}
8378  ins_encode %{
8379     __ notl($dst$$Register);
8380  %}
8381  ins_pipe( ialu_reg );
8382%}
8383
8384// Xor Register with Immediate
8385instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8386  match(Set dst (XorI dst src));
8387  effect(KILL cr);
8388
8389  format %{ "XOR    $dst,$src" %}
8390  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8391  // ins_encode( RegImm( dst, src) );
8392  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8393  ins_pipe( ialu_reg );
8394%}
8395
8396// Xor Register with Memory
8397instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8398  match(Set dst (XorI dst (LoadI src)));
8399  effect(KILL cr);
8400
8401  ins_cost(125);
8402  format %{ "XOR    $dst,$src" %}
8403  opcode(0x33);
8404  ins_encode( OpcP, RegMem(dst, src) );
8405  ins_pipe( ialu_reg_mem );
8406%}
8407
8408// Xor Memory with Register
8409instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8410  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8411  effect(KILL cr);
8412
8413  ins_cost(150);
8414  format %{ "XOR    $dst,$src" %}
8415  opcode(0x31);  /* Opcode 31 /r */
8416  ins_encode( OpcP, RegMem( src, dst ) );
8417  ins_pipe( ialu_mem_reg );
8418%}
8419
8420// Xor Memory with Immediate
8421instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8422  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8423  effect(KILL cr);
8424
8425  ins_cost(125);
8426  format %{ "XOR    $dst,$src" %}
8427  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8428  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8429  ins_pipe( ialu_mem_imm );
8430%}
8431
8432//----------Convert Int to Boolean---------------------------------------------
8433
8434instruct movI_nocopy(rRegI dst, rRegI src) %{
8435  effect( DEF dst, USE src );
8436  format %{ "MOV    $dst,$src" %}
8437  ins_encode( enc_Copy( dst, src) );
8438  ins_pipe( ialu_reg_reg );
8439%}
8440
8441instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8442  effect( USE_DEF dst, USE src, KILL cr );
8443
8444  size(4);
8445  format %{ "NEG    $dst\n\t"
8446            "ADC    $dst,$src" %}
8447  ins_encode( neg_reg(dst),
8448              OpcRegReg(0x13,dst,src) );
8449  ins_pipe( ialu_reg_reg_long );
8450%}
8451
8452instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8453  match(Set dst (Conv2B src));
8454
8455  expand %{
8456    movI_nocopy(dst,src);
8457    ci2b(dst,src,cr);
8458  %}
8459%}
8460
8461instruct movP_nocopy(rRegI dst, eRegP src) %{
8462  effect( DEF dst, USE src );
8463  format %{ "MOV    $dst,$src" %}
8464  ins_encode( enc_Copy( dst, src) );
8465  ins_pipe( ialu_reg_reg );
8466%}
8467
8468instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8469  effect( USE_DEF dst, USE src, KILL cr );
8470  format %{ "NEG    $dst\n\t"
8471            "ADC    $dst,$src" %}
8472  ins_encode( neg_reg(dst),
8473              OpcRegReg(0x13,dst,src) );
8474  ins_pipe( ialu_reg_reg_long );
8475%}
8476
8477instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8478  match(Set dst (Conv2B src));
8479
8480  expand %{
8481    movP_nocopy(dst,src);
8482    cp2b(dst,src,cr);
8483  %}
8484%}
8485
8486instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8487  match(Set dst (CmpLTMask p q));
8488  effect(KILL cr);
8489  ins_cost(400);
8490
8491  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8492  format %{ "XOR    $dst,$dst\n\t"
8493            "CMP    $p,$q\n\t"
8494            "SETlt  $dst\n\t"
8495            "NEG    $dst" %}
8496  ins_encode %{
8497    Register Rp = $p$$Register;
8498    Register Rq = $q$$Register;
8499    Register Rd = $dst$$Register;
8500    Label done;
8501    __ xorl(Rd, Rd);
8502    __ cmpl(Rp, Rq);
8503    __ setb(Assembler::less, Rd);
8504    __ negl(Rd);
8505  %}
8506
8507  ins_pipe(pipe_slow);
8508%}
8509
8510instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8511  match(Set dst (CmpLTMask dst zero));
8512  effect(DEF dst, KILL cr);
8513  ins_cost(100);
8514
8515  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8516  ins_encode %{
8517  __ sarl($dst$$Register, 31);
8518  %}
8519  ins_pipe(ialu_reg);
8520%}
8521
8522/* better to save a register than avoid a branch */
8523instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8524  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8525  effect(KILL cr);
8526  ins_cost(400);
8527  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8528            "JGE    done\n\t"
8529            "ADD    $p,$y\n"
8530            "done:  " %}
8531  ins_encode %{
8532    Register Rp = $p$$Register;
8533    Register Rq = $q$$Register;
8534    Register Ry = $y$$Register;
8535    Label done;
8536    __ subl(Rp, Rq);
8537    __ jccb(Assembler::greaterEqual, done);
8538    __ addl(Rp, Ry);
8539    __ bind(done);
8540  %}
8541
8542  ins_pipe(pipe_cmplt);
8543%}
8544
8545/* better to save a register than avoid a branch */
8546instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8547  match(Set y (AndI (CmpLTMask p q) y));
8548  effect(KILL cr);
8549
8550  ins_cost(300);
8551
8552  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8553            "JLT      done\n\t"
8554            "XORL     $y, $y\n"
8555            "done:  " %}
8556  ins_encode %{
8557    Register Rp = $p$$Register;
8558    Register Rq = $q$$Register;
8559    Register Ry = $y$$Register;
8560    Label done;
8561    __ cmpl(Rp, Rq);
8562    __ jccb(Assembler::less, done);
8563    __ xorl(Ry, Ry);
8564    __ bind(done);
8565  %}
8566
8567  ins_pipe(pipe_cmplt);
8568%}
8569
8570/* If I enable this, I encourage spilling in the inner loop of compress.
8571instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8572  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8573*/
8574//----------Overflow Math Instructions-----------------------------------------
8575
8576instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8577%{
8578  match(Set cr (OverflowAddI op1 op2));
8579  effect(DEF cr, USE_KILL op1, USE op2);
8580
8581  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8582
8583  ins_encode %{
8584    __ addl($op1$$Register, $op2$$Register);
8585  %}
8586  ins_pipe(ialu_reg_reg);
8587%}
8588
8589instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8590%{
8591  match(Set cr (OverflowAddI op1 op2));
8592  effect(DEF cr, USE_KILL op1, USE op2);
8593
8594  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8595
8596  ins_encode %{
8597    __ addl($op1$$Register, $op2$$constant);
8598  %}
8599  ins_pipe(ialu_reg_reg);
8600%}
8601
8602instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8603%{
8604  match(Set cr (OverflowSubI op1 op2));
8605
8606  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8607  ins_encode %{
8608    __ cmpl($op1$$Register, $op2$$Register);
8609  %}
8610  ins_pipe(ialu_reg_reg);
8611%}
8612
8613instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8614%{
8615  match(Set cr (OverflowSubI op1 op2));
8616
8617  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8618  ins_encode %{
8619    __ cmpl($op1$$Register, $op2$$constant);
8620  %}
8621  ins_pipe(ialu_reg_reg);
8622%}
8623
8624instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8625%{
8626  match(Set cr (OverflowSubI zero op2));
8627  effect(DEF cr, USE_KILL op2);
8628
8629  format %{ "NEG    $op2\t# overflow check int" %}
8630  ins_encode %{
8631    __ negl($op2$$Register);
8632  %}
8633  ins_pipe(ialu_reg_reg);
8634%}
8635
8636instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8637%{
8638  match(Set cr (OverflowMulI op1 op2));
8639  effect(DEF cr, USE_KILL op1, USE op2);
8640
8641  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8642  ins_encode %{
8643    __ imull($op1$$Register, $op2$$Register);
8644  %}
8645  ins_pipe(ialu_reg_reg_alu0);
8646%}
8647
8648instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8649%{
8650  match(Set cr (OverflowMulI op1 op2));
8651  effect(DEF cr, TEMP tmp, USE op1, USE op2);
8652
8653  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8654  ins_encode %{
8655    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8656  %}
8657  ins_pipe(ialu_reg_reg_alu0);
8658%}
8659
8660//----------Long Instructions------------------------------------------------
8661// Add Long Register with Register
8662instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8663  match(Set dst (AddL dst src));
8664  effect(KILL cr);
8665  ins_cost(200);
8666  format %{ "ADD    $dst.lo,$src.lo\n\t"
8667            "ADC    $dst.hi,$src.hi" %}
8668  opcode(0x03, 0x13);
8669  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8670  ins_pipe( ialu_reg_reg_long );
8671%}
8672
8673// Add Long Register with Immediate
8674instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8675  match(Set dst (AddL dst src));
8676  effect(KILL cr);
8677  format %{ "ADD    $dst.lo,$src.lo\n\t"
8678            "ADC    $dst.hi,$src.hi" %}
8679  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8680  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8681  ins_pipe( ialu_reg_long );
8682%}
8683
8684// Add Long Register with Memory
8685instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8686  match(Set dst (AddL dst (LoadL mem)));
8687  effect(KILL cr);
8688  ins_cost(125);
8689  format %{ "ADD    $dst.lo,$mem\n\t"
8690            "ADC    $dst.hi,$mem+4" %}
8691  opcode(0x03, 0x13);
8692  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8693  ins_pipe( ialu_reg_long_mem );
8694%}
8695
8696// Subtract Long Register with Register.
8697instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8698  match(Set dst (SubL dst src));
8699  effect(KILL cr);
8700  ins_cost(200);
8701  format %{ "SUB    $dst.lo,$src.lo\n\t"
8702            "SBB    $dst.hi,$src.hi" %}
8703  opcode(0x2B, 0x1B);
8704  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8705  ins_pipe( ialu_reg_reg_long );
8706%}
8707
8708// Subtract Long Register with Immediate
8709instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8710  match(Set dst (SubL dst src));
8711  effect(KILL cr);
8712  format %{ "SUB    $dst.lo,$src.lo\n\t"
8713            "SBB    $dst.hi,$src.hi" %}
8714  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8715  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8716  ins_pipe( ialu_reg_long );
8717%}
8718
8719// Subtract Long Register with Memory
8720instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8721  match(Set dst (SubL dst (LoadL mem)));
8722  effect(KILL cr);
8723  ins_cost(125);
8724  format %{ "SUB    $dst.lo,$mem\n\t"
8725            "SBB    $dst.hi,$mem+4" %}
8726  opcode(0x2B, 0x1B);
8727  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8728  ins_pipe( ialu_reg_long_mem );
8729%}
8730
8731instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8732  match(Set dst (SubL zero dst));
8733  effect(KILL cr);
8734  ins_cost(300);
8735  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8736  ins_encode( neg_long(dst) );
8737  ins_pipe( ialu_reg_reg_long );
8738%}
8739
8740// And Long Register with Register
8741instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8742  match(Set dst (AndL dst src));
8743  effect(KILL cr);
8744  format %{ "AND    $dst.lo,$src.lo\n\t"
8745            "AND    $dst.hi,$src.hi" %}
8746  opcode(0x23,0x23);
8747  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8748  ins_pipe( ialu_reg_reg_long );
8749%}
8750
8751// And Long Register with Immediate
8752instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8753  match(Set dst (AndL dst src));
8754  effect(KILL cr);
8755  format %{ "AND    $dst.lo,$src.lo\n\t"
8756            "AND    $dst.hi,$src.hi" %}
8757  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8758  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8759  ins_pipe( ialu_reg_long );
8760%}
8761
8762// And Long Register with Memory
8763instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8764  match(Set dst (AndL dst (LoadL mem)));
8765  effect(KILL cr);
8766  ins_cost(125);
8767  format %{ "AND    $dst.lo,$mem\n\t"
8768            "AND    $dst.hi,$mem+4" %}
8769  opcode(0x23, 0x23);
8770  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8771  ins_pipe( ialu_reg_long_mem );
8772%}
8773
8774// BMI1 instructions
8775instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8776  match(Set dst (AndL (XorL src1 minus_1) src2));
8777  predicate(UseBMI1Instructions);
8778  effect(KILL cr, TEMP dst);
8779
8780  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8781            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8782         %}
8783
8784  ins_encode %{
8785    Register Rdst = $dst$$Register;
8786    Register Rsrc1 = $src1$$Register;
8787    Register Rsrc2 = $src2$$Register;
8788    __ andnl(Rdst, Rsrc1, Rsrc2);
8789    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8790  %}
8791  ins_pipe(ialu_reg_reg_long);
8792%}
8793
8794instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8795  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8796  predicate(UseBMI1Instructions);
8797  effect(KILL cr, TEMP dst);
8798
8799  ins_cost(125);
8800  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8801            "ANDNL  $dst.hi, $src1.hi, $src2+4"
8802         %}
8803
8804  ins_encode %{
8805    Register Rdst = $dst$$Register;
8806    Register Rsrc1 = $src1$$Register;
8807    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8808
8809    __ andnl(Rdst, Rsrc1, $src2$$Address);
8810    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8811  %}
8812  ins_pipe(ialu_reg_mem);
8813%}
8814
8815instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8816  match(Set dst (AndL (SubL imm_zero src) src));
8817  predicate(UseBMI1Instructions);
8818  effect(KILL cr, TEMP dst);
8819
8820  format %{ "MOVL   $dst.hi, 0\n\t"
8821            "BLSIL  $dst.lo, $src.lo\n\t"
8822            "JNZ    done\n\t"
8823            "BLSIL  $dst.hi, $src.hi\n"
8824            "done:"
8825         %}
8826
8827  ins_encode %{
8828    Label done;
8829    Register Rdst = $dst$$Register;
8830    Register Rsrc = $src$$Register;
8831    __ movl(HIGH_FROM_LOW(Rdst), 0);
8832    __ blsil(Rdst, Rsrc);
8833    __ jccb(Assembler::notZero, done);
8834    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8835    __ bind(done);
8836  %}
8837  ins_pipe(ialu_reg);
8838%}
8839
8840instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8841  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8842  predicate(UseBMI1Instructions);
8843  effect(KILL cr, TEMP dst);
8844
8845  ins_cost(125);
8846  format %{ "MOVL   $dst.hi, 0\n\t"
8847            "BLSIL  $dst.lo, $src\n\t"
8848            "JNZ    done\n\t"
8849            "BLSIL  $dst.hi, $src+4\n"
8850            "done:"
8851         %}
8852
8853  ins_encode %{
8854    Label done;
8855    Register Rdst = $dst$$Register;
8856    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8857
8858    __ movl(HIGH_FROM_LOW(Rdst), 0);
8859    __ blsil(Rdst, $src$$Address);
8860    __ jccb(Assembler::notZero, done);
8861    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8862    __ bind(done);
8863  %}
8864  ins_pipe(ialu_reg_mem);
8865%}
8866
8867instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8868%{
8869  match(Set dst (XorL (AddL src minus_1) src));
8870  predicate(UseBMI1Instructions);
8871  effect(KILL cr, TEMP dst);
8872
8873  format %{ "MOVL    $dst.hi, 0\n\t"
8874            "BLSMSKL $dst.lo, $src.lo\n\t"
8875            "JNC     done\n\t"
8876            "BLSMSKL $dst.hi, $src.hi\n"
8877            "done:"
8878         %}
8879
8880  ins_encode %{
8881    Label done;
8882    Register Rdst = $dst$$Register;
8883    Register Rsrc = $src$$Register;
8884    __ movl(HIGH_FROM_LOW(Rdst), 0);
8885    __ blsmskl(Rdst, Rsrc);
8886    __ jccb(Assembler::carryClear, done);
8887    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8888    __ bind(done);
8889  %}
8890
8891  ins_pipe(ialu_reg);
8892%}
8893
8894instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8895%{
8896  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8897  predicate(UseBMI1Instructions);
8898  effect(KILL cr, TEMP dst);
8899
8900  ins_cost(125);
8901  format %{ "MOVL    $dst.hi, 0\n\t"
8902            "BLSMSKL $dst.lo, $src\n\t"
8903            "JNC     done\n\t"
8904            "BLSMSKL $dst.hi, $src+4\n"
8905            "done:"
8906         %}
8907
8908  ins_encode %{
8909    Label done;
8910    Register Rdst = $dst$$Register;
8911    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8912
8913    __ movl(HIGH_FROM_LOW(Rdst), 0);
8914    __ blsmskl(Rdst, $src$$Address);
8915    __ jccb(Assembler::carryClear, done);
8916    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8917    __ bind(done);
8918  %}
8919
8920  ins_pipe(ialu_reg_mem);
8921%}
8922
8923instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8924%{
8925  match(Set dst (AndL (AddL src minus_1) src) );
8926  predicate(UseBMI1Instructions);
8927  effect(KILL cr, TEMP dst);
8928
8929  format %{ "MOVL   $dst.hi, $src.hi\n\t"
8930            "BLSRL  $dst.lo, $src.lo\n\t"
8931            "JNC    done\n\t"
8932            "BLSRL  $dst.hi, $src.hi\n"
8933            "done:"
8934  %}
8935
8936  ins_encode %{
8937    Label done;
8938    Register Rdst = $dst$$Register;
8939    Register Rsrc = $src$$Register;
8940    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8941    __ blsrl(Rdst, Rsrc);
8942    __ jccb(Assembler::carryClear, done);
8943    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8944    __ bind(done);
8945  %}
8946
8947  ins_pipe(ialu_reg);
8948%}
8949
8950instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8951%{
8952  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8953  predicate(UseBMI1Instructions);
8954  effect(KILL cr, TEMP dst);
8955
8956  ins_cost(125);
8957  format %{ "MOVL   $dst.hi, $src+4\n\t"
8958            "BLSRL  $dst.lo, $src\n\t"
8959            "JNC    done\n\t"
8960            "BLSRL  $dst.hi, $src+4\n"
8961            "done:"
8962  %}
8963
8964  ins_encode %{
8965    Label done;
8966    Register Rdst = $dst$$Register;
8967    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8968    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8969    __ blsrl(Rdst, $src$$Address);
8970    __ jccb(Assembler::carryClear, done);
8971    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8972    __ bind(done);
8973  %}
8974
8975  ins_pipe(ialu_reg_mem);
8976%}
8977
8978// Or Long Register with Register
8979instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8980  match(Set dst (OrL dst src));
8981  effect(KILL cr);
8982  format %{ "OR     $dst.lo,$src.lo\n\t"
8983            "OR     $dst.hi,$src.hi" %}
8984  opcode(0x0B,0x0B);
8985  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8986  ins_pipe( ialu_reg_reg_long );
8987%}
8988
8989// Or Long Register with Immediate
8990instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8991  match(Set dst (OrL dst src));
8992  effect(KILL cr);
8993  format %{ "OR     $dst.lo,$src.lo\n\t"
8994            "OR     $dst.hi,$src.hi" %}
8995  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8996  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8997  ins_pipe( ialu_reg_long );
8998%}
8999
9000// Or Long Register with Memory
9001instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9002  match(Set dst (OrL dst (LoadL mem)));
9003  effect(KILL cr);
9004  ins_cost(125);
9005  format %{ "OR     $dst.lo,$mem\n\t"
9006            "OR     $dst.hi,$mem+4" %}
9007  opcode(0x0B,0x0B);
9008  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9009  ins_pipe( ialu_reg_long_mem );
9010%}
9011
9012// Xor Long Register with Register
9013instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9014  match(Set dst (XorL dst src));
9015  effect(KILL cr);
9016  format %{ "XOR    $dst.lo,$src.lo\n\t"
9017            "XOR    $dst.hi,$src.hi" %}
9018  opcode(0x33,0x33);
9019  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9020  ins_pipe( ialu_reg_reg_long );
9021%}
9022
9023// Xor Long Register with Immediate -1
9024instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9025  match(Set dst (XorL dst imm));
9026  format %{ "NOT    $dst.lo\n\t"
9027            "NOT    $dst.hi" %}
9028  ins_encode %{
9029     __ notl($dst$$Register);
9030     __ notl(HIGH_FROM_LOW($dst$$Register));
9031  %}
9032  ins_pipe( ialu_reg_long );
9033%}
9034
9035// Xor Long Register with Immediate
9036instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9037  match(Set dst (XorL dst src));
9038  effect(KILL cr);
9039  format %{ "XOR    $dst.lo,$src.lo\n\t"
9040            "XOR    $dst.hi,$src.hi" %}
9041  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9042  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9043  ins_pipe( ialu_reg_long );
9044%}
9045
9046// Xor Long Register with Memory
9047instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9048  match(Set dst (XorL dst (LoadL mem)));
9049  effect(KILL cr);
9050  ins_cost(125);
9051  format %{ "XOR    $dst.lo,$mem\n\t"
9052            "XOR    $dst.hi,$mem+4" %}
9053  opcode(0x33,0x33);
9054  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9055  ins_pipe( ialu_reg_long_mem );
9056%}
9057
9058// Shift Left Long by 1
9059instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9060  predicate(UseNewLongLShift);
9061  match(Set dst (LShiftL dst cnt));
9062  effect(KILL cr);
9063  ins_cost(100);
9064  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9065            "ADC    $dst.hi,$dst.hi" %}
9066  ins_encode %{
9067    __ addl($dst$$Register,$dst$$Register);
9068    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9069  %}
9070  ins_pipe( ialu_reg_long );
9071%}
9072
9073// Shift Left Long by 2
9074instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9075  predicate(UseNewLongLShift);
9076  match(Set dst (LShiftL dst cnt));
9077  effect(KILL cr);
9078  ins_cost(100);
9079  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9080            "ADC    $dst.hi,$dst.hi\n\t"
9081            "ADD    $dst.lo,$dst.lo\n\t"
9082            "ADC    $dst.hi,$dst.hi" %}
9083  ins_encode %{
9084    __ addl($dst$$Register,$dst$$Register);
9085    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9086    __ addl($dst$$Register,$dst$$Register);
9087    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9088  %}
9089  ins_pipe( ialu_reg_long );
9090%}
9091
9092// Shift Left Long by 3
9093instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9094  predicate(UseNewLongLShift);
9095  match(Set dst (LShiftL dst cnt));
9096  effect(KILL cr);
9097  ins_cost(100);
9098  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9099            "ADC    $dst.hi,$dst.hi\n\t"
9100            "ADD    $dst.lo,$dst.lo\n\t"
9101            "ADC    $dst.hi,$dst.hi\n\t"
9102            "ADD    $dst.lo,$dst.lo\n\t"
9103            "ADC    $dst.hi,$dst.hi" %}
9104  ins_encode %{
9105    __ addl($dst$$Register,$dst$$Register);
9106    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9107    __ addl($dst$$Register,$dst$$Register);
9108    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9109    __ addl($dst$$Register,$dst$$Register);
9110    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9111  %}
9112  ins_pipe( ialu_reg_long );
9113%}
9114
9115// Shift Left Long by 1-31
9116instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9117  match(Set dst (LShiftL dst cnt));
9118  effect(KILL cr);
9119  ins_cost(200);
9120  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9121            "SHL    $dst.lo,$cnt" %}
9122  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9123  ins_encode( move_long_small_shift(dst,cnt) );
9124  ins_pipe( ialu_reg_long );
9125%}
9126
9127// Shift Left Long by 32-63
9128instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9129  match(Set dst (LShiftL dst cnt));
9130  effect(KILL cr);
9131  ins_cost(300);
9132  format %{ "MOV    $dst.hi,$dst.lo\n"
9133          "\tSHL    $dst.hi,$cnt-32\n"
9134          "\tXOR    $dst.lo,$dst.lo" %}
9135  opcode(0xC1, 0x4);  /* C1 /4 ib */
9136  ins_encode( move_long_big_shift_clr(dst,cnt) );
9137  ins_pipe( ialu_reg_long );
9138%}
9139
9140// Shift Left Long by variable
9141instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9142  match(Set dst (LShiftL dst shift));
9143  effect(KILL cr);
9144  ins_cost(500+200);
9145  size(17);
9146  format %{ "TEST   $shift,32\n\t"
9147            "JEQ,s  small\n\t"
9148            "MOV    $dst.hi,$dst.lo\n\t"
9149            "XOR    $dst.lo,$dst.lo\n"
9150    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9151            "SHL    $dst.lo,$shift" %}
9152  ins_encode( shift_left_long( dst, shift ) );
9153  ins_pipe( pipe_slow );
9154%}
9155
9156// Shift Right Long by 1-31
9157instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9158  match(Set dst (URShiftL dst cnt));
9159  effect(KILL cr);
9160  ins_cost(200);
9161  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9162            "SHR    $dst.hi,$cnt" %}
9163  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9164  ins_encode( move_long_small_shift(dst,cnt) );
9165  ins_pipe( ialu_reg_long );
9166%}
9167
9168// Shift Right Long by 32-63
9169instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9170  match(Set dst (URShiftL dst cnt));
9171  effect(KILL cr);
9172  ins_cost(300);
9173  format %{ "MOV    $dst.lo,$dst.hi\n"
9174          "\tSHR    $dst.lo,$cnt-32\n"
9175          "\tXOR    $dst.hi,$dst.hi" %}
9176  opcode(0xC1, 0x5);  /* C1 /5 ib */
9177  ins_encode( move_long_big_shift_clr(dst,cnt) );
9178  ins_pipe( ialu_reg_long );
9179%}
9180
9181// Shift Right Long by variable
9182instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9183  match(Set dst (URShiftL dst shift));
9184  effect(KILL cr);
9185  ins_cost(600);
9186  size(17);
9187  format %{ "TEST   $shift,32\n\t"
9188            "JEQ,s  small\n\t"
9189            "MOV    $dst.lo,$dst.hi\n\t"
9190            "XOR    $dst.hi,$dst.hi\n"
9191    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9192            "SHR    $dst.hi,$shift" %}
9193  ins_encode( shift_right_long( dst, shift ) );
9194  ins_pipe( pipe_slow );
9195%}
9196
9197// Shift Right Long by 1-31
9198instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9199  match(Set dst (RShiftL dst cnt));
9200  effect(KILL cr);
9201  ins_cost(200);
9202  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9203            "SAR    $dst.hi,$cnt" %}
9204  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9205  ins_encode( move_long_small_shift(dst,cnt) );
9206  ins_pipe( ialu_reg_long );
9207%}
9208
9209// Shift Right Long by 32-63
9210instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9211  match(Set dst (RShiftL dst cnt));
9212  effect(KILL cr);
9213  ins_cost(300);
9214  format %{ "MOV    $dst.lo,$dst.hi\n"
9215          "\tSAR    $dst.lo,$cnt-32\n"
9216          "\tSAR    $dst.hi,31" %}
9217  opcode(0xC1, 0x7);  /* C1 /7 ib */
9218  ins_encode( move_long_big_shift_sign(dst,cnt) );
9219  ins_pipe( ialu_reg_long );
9220%}
9221
9222// Shift Right arithmetic Long by variable
9223instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9224  match(Set dst (RShiftL dst shift));
9225  effect(KILL cr);
9226  ins_cost(600);
9227  size(18);
9228  format %{ "TEST   $shift,32\n\t"
9229            "JEQ,s  small\n\t"
9230            "MOV    $dst.lo,$dst.hi\n\t"
9231            "SAR    $dst.hi,31\n"
9232    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9233            "SAR    $dst.hi,$shift" %}
9234  ins_encode( shift_right_arith_long( dst, shift ) );
9235  ins_pipe( pipe_slow );
9236%}
9237
9238
9239//----------Double Instructions------------------------------------------------
9240// Double Math
9241
9242// Compare & branch
9243
9244// P6 version of float compare, sets condition codes in EFLAGS
9245instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9246  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9247  match(Set cr (CmpD src1 src2));
9248  effect(KILL rax);
9249  ins_cost(150);
9250  format %{ "FLD    $src1\n\t"
9251            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9252            "JNP    exit\n\t"
9253            "MOV    ah,1       // saw a NaN, set CF\n\t"
9254            "SAHF\n"
9255     "exit:\tNOP               // avoid branch to branch" %}
9256  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9257  ins_encode( Push_Reg_DPR(src1),
9258              OpcP, RegOpc(src2),
9259              cmpF_P6_fixup );
9260  ins_pipe( pipe_slow );
9261%}
9262
9263instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9264  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9265  match(Set cr (CmpD src1 src2));
9266  ins_cost(150);
9267  format %{ "FLD    $src1\n\t"
9268            "FUCOMIP ST,$src2  // P6 instruction" %}
9269  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9270  ins_encode( Push_Reg_DPR(src1),
9271              OpcP, RegOpc(src2));
9272  ins_pipe( pipe_slow );
9273%}
9274
9275// Compare & branch
9276instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9277  predicate(UseSSE<=1);
9278  match(Set cr (CmpD src1 src2));
9279  effect(KILL rax);
9280  ins_cost(200);
9281  format %{ "FLD    $src1\n\t"
9282            "FCOMp  $src2\n\t"
9283            "FNSTSW AX\n\t"
9284            "TEST   AX,0x400\n\t"
9285            "JZ,s   flags\n\t"
9286            "MOV    AH,1\t# unordered treat as LT\n"
9287    "flags:\tSAHF" %}
9288  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9289  ins_encode( Push_Reg_DPR(src1),
9290              OpcP, RegOpc(src2),
9291              fpu_flags);
9292  ins_pipe( pipe_slow );
9293%}
9294
9295// Compare vs zero into -1,0,1
9296instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9297  predicate(UseSSE<=1);
9298  match(Set dst (CmpD3 src1 zero));
9299  effect(KILL cr, KILL rax);
9300  ins_cost(280);
9301  format %{ "FTSTD  $dst,$src1" %}
9302  opcode(0xE4, 0xD9);
9303  ins_encode( Push_Reg_DPR(src1),
9304              OpcS, OpcP, PopFPU,
9305              CmpF_Result(dst));
9306  ins_pipe( pipe_slow );
9307%}
9308
9309// Compare into -1,0,1
9310instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9311  predicate(UseSSE<=1);
9312  match(Set dst (CmpD3 src1 src2));
9313  effect(KILL cr, KILL rax);
9314  ins_cost(300);
9315  format %{ "FCMPD  $dst,$src1,$src2" %}
9316  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9317  ins_encode( Push_Reg_DPR(src1),
9318              OpcP, RegOpc(src2),
9319              CmpF_Result(dst));
9320  ins_pipe( pipe_slow );
9321%}
9322
9323// float compare and set condition codes in EFLAGS by XMM regs
9324instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9325  predicate(UseSSE>=2);
9326  match(Set cr (CmpD src1 src2));
9327  ins_cost(145);
9328  format %{ "UCOMISD $src1,$src2\n\t"
9329            "JNP,s   exit\n\t"
9330            "PUSHF\t# saw NaN, set CF\n\t"
9331            "AND     [rsp], #0xffffff2b\n\t"
9332            "POPF\n"
9333    "exit:" %}
9334  ins_encode %{
9335    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9336    emit_cmpfp_fixup(_masm);
9337  %}
9338  ins_pipe( pipe_slow );
9339%}
9340
9341instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9342  predicate(UseSSE>=2);
9343  match(Set cr (CmpD src1 src2));
9344  ins_cost(100);
9345  format %{ "UCOMISD $src1,$src2" %}
9346  ins_encode %{
9347    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9348  %}
9349  ins_pipe( pipe_slow );
9350%}
9351
9352// float compare and set condition codes in EFLAGS by XMM regs
9353instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9354  predicate(UseSSE>=2);
9355  match(Set cr (CmpD src1 (LoadD src2)));
9356  ins_cost(145);
9357  format %{ "UCOMISD $src1,$src2\n\t"
9358            "JNP,s   exit\n\t"
9359            "PUSHF\t# saw NaN, set CF\n\t"
9360            "AND     [rsp], #0xffffff2b\n\t"
9361            "POPF\n"
9362    "exit:" %}
9363  ins_encode %{
9364    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9365    emit_cmpfp_fixup(_masm);
9366  %}
9367  ins_pipe( pipe_slow );
9368%}
9369
9370instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9371  predicate(UseSSE>=2);
9372  match(Set cr (CmpD src1 (LoadD src2)));
9373  ins_cost(100);
9374  format %{ "UCOMISD $src1,$src2" %}
9375  ins_encode %{
9376    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9377  %}
9378  ins_pipe( pipe_slow );
9379%}
9380
9381// Compare into -1,0,1 in XMM
9382instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9383  predicate(UseSSE>=2);
9384  match(Set dst (CmpD3 src1 src2));
9385  effect(KILL cr);
9386  ins_cost(255);
9387  format %{ "UCOMISD $src1, $src2\n\t"
9388            "MOV     $dst, #-1\n\t"
9389            "JP,s    done\n\t"
9390            "JB,s    done\n\t"
9391            "SETNE   $dst\n\t"
9392            "MOVZB   $dst, $dst\n"
9393    "done:" %}
9394  ins_encode %{
9395    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9396    emit_cmpfp3(_masm, $dst$$Register);
9397  %}
9398  ins_pipe( pipe_slow );
9399%}
9400
9401// Compare into -1,0,1 in XMM and memory
9402instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9403  predicate(UseSSE>=2);
9404  match(Set dst (CmpD3 src1 (LoadD src2)));
9405  effect(KILL cr);
9406  ins_cost(275);
9407  format %{ "UCOMISD $src1, $src2\n\t"
9408            "MOV     $dst, #-1\n\t"
9409            "JP,s    done\n\t"
9410            "JB,s    done\n\t"
9411            "SETNE   $dst\n\t"
9412            "MOVZB   $dst, $dst\n"
9413    "done:" %}
9414  ins_encode %{
9415    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9416    emit_cmpfp3(_masm, $dst$$Register);
9417  %}
9418  ins_pipe( pipe_slow );
9419%}
9420
9421
9422instruct subDPR_reg(regDPR dst, regDPR src) %{
9423  predicate (UseSSE <=1);
9424  match(Set dst (SubD dst src));
9425
9426  format %{ "FLD    $src\n\t"
9427            "DSUBp  $dst,ST" %}
9428  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9429  ins_cost(150);
9430  ins_encode( Push_Reg_DPR(src),
9431              OpcP, RegOpc(dst) );
9432  ins_pipe( fpu_reg_reg );
9433%}
9434
9435instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9436  predicate (UseSSE <=1);
9437  match(Set dst (RoundDouble (SubD src1 src2)));
9438  ins_cost(250);
9439
9440  format %{ "FLD    $src2\n\t"
9441            "DSUB   ST,$src1\n\t"
9442            "FSTP_D $dst\t# D-round" %}
9443  opcode(0xD8, 0x5);
9444  ins_encode( Push_Reg_DPR(src2),
9445              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9446  ins_pipe( fpu_mem_reg_reg );
9447%}
9448
9449
9450instruct subDPR_reg_mem(regDPR dst, memory src) %{
9451  predicate (UseSSE <=1);
9452  match(Set dst (SubD dst (LoadD src)));
9453  ins_cost(150);
9454
9455  format %{ "FLD    $src\n\t"
9456            "DSUBp  $dst,ST" %}
9457  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9458  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9459              OpcP, RegOpc(dst) );
9460  ins_pipe( fpu_reg_mem );
9461%}
9462
9463instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9464  predicate (UseSSE<=1);
9465  match(Set dst (AbsD src));
9466  ins_cost(100);
9467  format %{ "FABS" %}
9468  opcode(0xE1, 0xD9);
9469  ins_encode( OpcS, OpcP );
9470  ins_pipe( fpu_reg_reg );
9471%}
9472
9473instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9474  predicate(UseSSE<=1);
9475  match(Set dst (NegD src));
9476  ins_cost(100);
9477  format %{ "FCHS" %}
9478  opcode(0xE0, 0xD9);
9479  ins_encode( OpcS, OpcP );
9480  ins_pipe( fpu_reg_reg );
9481%}
9482
9483instruct addDPR_reg(regDPR dst, regDPR src) %{
9484  predicate(UseSSE<=1);
9485  match(Set dst (AddD dst src));
9486  format %{ "FLD    $src\n\t"
9487            "DADD   $dst,ST" %}
9488  size(4);
9489  ins_cost(150);
9490  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9491  ins_encode( Push_Reg_DPR(src),
9492              OpcP, RegOpc(dst) );
9493  ins_pipe( fpu_reg_reg );
9494%}
9495
9496
9497instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9498  predicate(UseSSE<=1);
9499  match(Set dst (RoundDouble (AddD src1 src2)));
9500  ins_cost(250);
9501
9502  format %{ "FLD    $src2\n\t"
9503            "DADD   ST,$src1\n\t"
9504            "FSTP_D $dst\t# D-round" %}
9505  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9506  ins_encode( Push_Reg_DPR(src2),
9507              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9508  ins_pipe( fpu_mem_reg_reg );
9509%}
9510
9511
9512instruct addDPR_reg_mem(regDPR dst, memory src) %{
9513  predicate(UseSSE<=1);
9514  match(Set dst (AddD dst (LoadD src)));
9515  ins_cost(150);
9516
9517  format %{ "FLD    $src\n\t"
9518            "DADDp  $dst,ST" %}
9519  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9520  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9521              OpcP, RegOpc(dst) );
9522  ins_pipe( fpu_reg_mem );
9523%}
9524
9525// add-to-memory
9526instruct addDPR_mem_reg(memory dst, regDPR src) %{
9527  predicate(UseSSE<=1);
9528  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9529  ins_cost(150);
9530
9531  format %{ "FLD_D  $dst\n\t"
9532            "DADD   ST,$src\n\t"
9533            "FST_D  $dst" %}
9534  opcode(0xDD, 0x0);
9535  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9536              Opcode(0xD8), RegOpc(src),
9537              set_instruction_start,
9538              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9539  ins_pipe( fpu_reg_mem );
9540%}
9541
9542instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9543  predicate(UseSSE<=1);
9544  match(Set dst (AddD dst con));
9545  ins_cost(125);
9546  format %{ "FLD1\n\t"
9547            "DADDp  $dst,ST" %}
9548  ins_encode %{
9549    __ fld1();
9550    __ faddp($dst$$reg);
9551  %}
9552  ins_pipe(fpu_reg);
9553%}
9554
9555instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9556  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9557  match(Set dst (AddD dst con));
9558  ins_cost(200);
9559  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9560            "DADDp  $dst,ST" %}
9561  ins_encode %{
9562    __ fld_d($constantaddress($con));
9563    __ faddp($dst$$reg);
9564  %}
9565  ins_pipe(fpu_reg_mem);
9566%}
9567
9568instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9569  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9570  match(Set dst (RoundDouble (AddD src con)));
9571  ins_cost(200);
9572  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9573            "DADD   ST,$src\n\t"
9574            "FSTP_D $dst\t# D-round" %}
9575  ins_encode %{
9576    __ fld_d($constantaddress($con));
9577    __ fadd($src$$reg);
9578    __ fstp_d(Address(rsp, $dst$$disp));
9579  %}
9580  ins_pipe(fpu_mem_reg_con);
9581%}
9582
9583instruct mulDPR_reg(regDPR dst, regDPR src) %{
9584  predicate(UseSSE<=1);
9585  match(Set dst (MulD dst src));
9586  format %{ "FLD    $src\n\t"
9587            "DMULp  $dst,ST" %}
9588  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9589  ins_cost(150);
9590  ins_encode( Push_Reg_DPR(src),
9591              OpcP, RegOpc(dst) );
9592  ins_pipe( fpu_reg_reg );
9593%}
9594
9595// Strict FP instruction biases argument before multiply then
9596// biases result to avoid double rounding of subnormals.
9597//
9598// scale arg1 by multiplying arg1 by 2^(-15360)
9599// load arg2
9600// multiply scaled arg1 by arg2
9601// rescale product by 2^(15360)
9602//
9603instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9604  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9605  match(Set dst (MulD dst src));
9606  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9607
9608  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9609            "DMULp  $dst,ST\n\t"
9610            "FLD    $src\n\t"
9611            "DMULp  $dst,ST\n\t"
9612            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9613            "DMULp  $dst,ST\n\t" %}
9614  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9615  ins_encode( strictfp_bias1(dst),
9616              Push_Reg_DPR(src),
9617              OpcP, RegOpc(dst),
9618              strictfp_bias2(dst) );
9619  ins_pipe( fpu_reg_reg );
9620%}
9621
9622instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9623  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9624  match(Set dst (MulD dst con));
9625  ins_cost(200);
9626  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9627            "DMULp  $dst,ST" %}
9628  ins_encode %{
9629    __ fld_d($constantaddress($con));
9630    __ fmulp($dst$$reg);
9631  %}
9632  ins_pipe(fpu_reg_mem);
9633%}
9634
9635
9636instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9637  predicate( UseSSE<=1 );
9638  match(Set dst (MulD dst (LoadD src)));
9639  ins_cost(200);
9640  format %{ "FLD_D  $src\n\t"
9641            "DMULp  $dst,ST" %}
9642  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9643  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9644              OpcP, RegOpc(dst) );
9645  ins_pipe( fpu_reg_mem );
9646%}
9647
9648//
9649// Cisc-alternate to reg-reg multiply
9650instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9651  predicate( UseSSE<=1 );
9652  match(Set dst (MulD src (LoadD mem)));
9653  ins_cost(250);
9654  format %{ "FLD_D  $mem\n\t"
9655            "DMUL   ST,$src\n\t"
9656            "FSTP_D $dst" %}
9657  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9658  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9659              OpcReg_FPR(src),
9660              Pop_Reg_DPR(dst) );
9661  ins_pipe( fpu_reg_reg_mem );
9662%}
9663
9664
9665// MACRO3 -- addDPR a mulDPR
9666// This instruction is a '2-address' instruction in that the result goes
9667// back to src2.  This eliminates a move from the macro; possibly the
9668// register allocator will have to add it back (and maybe not).
9669instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9670  predicate( UseSSE<=1 );
9671  match(Set src2 (AddD (MulD src0 src1) src2));
9672  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9673            "DMUL   ST,$src1\n\t"
9674            "DADDp  $src2,ST" %}
9675  ins_cost(250);
9676  opcode(0xDD); /* LoadD DD /0 */
9677  ins_encode( Push_Reg_FPR(src0),
9678              FMul_ST_reg(src1),
9679              FAddP_reg_ST(src2) );
9680  ins_pipe( fpu_reg_reg_reg );
9681%}
9682
9683
9684// MACRO3 -- subDPR a mulDPR
9685instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9686  predicate( UseSSE<=1 );
9687  match(Set src2 (SubD (MulD src0 src1) src2));
9688  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9689            "DMUL   ST,$src1\n\t"
9690            "DSUBRp $src2,ST" %}
9691  ins_cost(250);
9692  ins_encode( Push_Reg_FPR(src0),
9693              FMul_ST_reg(src1),
9694              Opcode(0xDE), Opc_plus(0xE0,src2));
9695  ins_pipe( fpu_reg_reg_reg );
9696%}
9697
9698
9699instruct divDPR_reg(regDPR dst, regDPR src) %{
9700  predicate( UseSSE<=1 );
9701  match(Set dst (DivD dst src));
9702
9703  format %{ "FLD    $src\n\t"
9704            "FDIVp  $dst,ST" %}
9705  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9706  ins_cost(150);
9707  ins_encode( Push_Reg_DPR(src),
9708              OpcP, RegOpc(dst) );
9709  ins_pipe( fpu_reg_reg );
9710%}
9711
9712// Strict FP instruction biases argument before division then
9713// biases result, to avoid double rounding of subnormals.
9714//
9715// scale dividend by multiplying dividend by 2^(-15360)
9716// load divisor
9717// divide scaled dividend by divisor
9718// rescale quotient by 2^(15360)
9719//
9720instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9721  predicate (UseSSE<=1);
9722  match(Set dst (DivD dst src));
9723  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9724  ins_cost(01);
9725
9726  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9727            "DMULp  $dst,ST\n\t"
9728            "FLD    $src\n\t"
9729            "FDIVp  $dst,ST\n\t"
9730            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9731            "DMULp  $dst,ST\n\t" %}
9732  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9733  ins_encode( strictfp_bias1(dst),
9734              Push_Reg_DPR(src),
9735              OpcP, RegOpc(dst),
9736              strictfp_bias2(dst) );
9737  ins_pipe( fpu_reg_reg );
9738%}
9739
9740instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9741  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9742  match(Set dst (RoundDouble (DivD src1 src2)));
9743
9744  format %{ "FLD    $src1\n\t"
9745            "FDIV   ST,$src2\n\t"
9746            "FSTP_D $dst\t# D-round" %}
9747  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9748  ins_encode( Push_Reg_DPR(src1),
9749              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9750  ins_pipe( fpu_mem_reg_reg );
9751%}
9752
9753
9754instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9755  predicate(UseSSE<=1);
9756  match(Set dst (ModD dst src));
9757  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9758
9759  format %{ "DMOD   $dst,$src" %}
9760  ins_cost(250);
9761  ins_encode(Push_Reg_Mod_DPR(dst, src),
9762              emitModDPR(),
9763              Push_Result_Mod_DPR(src),
9764              Pop_Reg_DPR(dst));
9765  ins_pipe( pipe_slow );
9766%}
9767
9768instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9769  predicate(UseSSE>=2);
9770  match(Set dst (ModD src0 src1));
9771  effect(KILL rax, KILL cr);
9772
9773  format %{ "SUB    ESP,8\t # DMOD\n"
9774          "\tMOVSD  [ESP+0],$src1\n"
9775          "\tFLD_D  [ESP+0]\n"
9776          "\tMOVSD  [ESP+0],$src0\n"
9777          "\tFLD_D  [ESP+0]\n"
9778     "loop:\tFPREM\n"
9779          "\tFWAIT\n"
9780          "\tFNSTSW AX\n"
9781          "\tSAHF\n"
9782          "\tJP     loop\n"
9783          "\tFSTP_D [ESP+0]\n"
9784          "\tMOVSD  $dst,[ESP+0]\n"
9785          "\tADD    ESP,8\n"
9786          "\tFSTP   ST0\t # Restore FPU Stack"
9787    %}
9788  ins_cost(250);
9789  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9790  ins_pipe( pipe_slow );
9791%}
9792
9793instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9794  predicate (UseSSE<=1);
9795  match(Set dst (SinD src));
9796  ins_cost(1800);
9797  format %{ "DSIN   $dst" %}
9798  opcode(0xD9, 0xFE);
9799  ins_encode( OpcP, OpcS );
9800  ins_pipe( pipe_slow );
9801%}
9802
9803instruct sinD_reg(regD dst, eFlagsReg cr) %{
9804  predicate (UseSSE>=2);
9805  match(Set dst (SinD dst));
9806  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9807  ins_cost(1800);
9808  format %{ "DSIN   $dst" %}
9809  opcode(0xD9, 0xFE);
9810  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9811  ins_pipe( pipe_slow );
9812%}
9813
9814instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9815  predicate (UseSSE<=1);
9816  match(Set dst (CosD src));
9817  ins_cost(1800);
9818  format %{ "DCOS   $dst" %}
9819  opcode(0xD9, 0xFF);
9820  ins_encode( OpcP, OpcS );
9821  ins_pipe( pipe_slow );
9822%}
9823
9824instruct cosD_reg(regD dst, eFlagsReg cr) %{
9825  predicate (UseSSE>=2);
9826  match(Set dst (CosD dst));
9827  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9828  ins_cost(1800);
9829  format %{ "DCOS   $dst" %}
9830  opcode(0xD9, 0xFF);
9831  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9832  ins_pipe( pipe_slow );
9833%}
9834
9835instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9836  predicate (UseSSE<=1);
9837  match(Set dst(TanD src));
9838  format %{ "DTAN   $dst" %}
9839  ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9840              Opcode(0xDD), Opcode(0xD8));   // fstp st
9841  ins_pipe( pipe_slow );
9842%}
9843
9844instruct tanD_reg(regD dst, eFlagsReg cr) %{
9845  predicate (UseSSE>=2);
9846  match(Set dst(TanD dst));
9847  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9848  format %{ "DTAN   $dst" %}
9849  ins_encode( Push_SrcD(dst),
9850              Opcode(0xD9), Opcode(0xF2),    // fptan
9851              Opcode(0xDD), Opcode(0xD8),   // fstp st
9852              Push_ResultD(dst) );
9853  ins_pipe( pipe_slow );
9854%}
9855
9856instruct atanDPR_reg(regDPR dst, regDPR src) %{
9857  predicate (UseSSE<=1);
9858  match(Set dst(AtanD dst src));
9859  format %{ "DATA   $dst,$src" %}
9860  opcode(0xD9, 0xF3);
9861  ins_encode( Push_Reg_DPR(src),
9862              OpcP, OpcS, RegOpc(dst) );
9863  ins_pipe( pipe_slow );
9864%}
9865
9866instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9867  predicate (UseSSE>=2);
9868  match(Set dst(AtanD dst src));
9869  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9870  format %{ "DATA   $dst,$src" %}
9871  opcode(0xD9, 0xF3);
9872  ins_encode( Push_SrcD(src),
9873              OpcP, OpcS, Push_ResultD(dst) );
9874  ins_pipe( pipe_slow );
9875%}
9876
9877instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9878  predicate (UseSSE<=1);
9879  match(Set dst (SqrtD src));
9880  format %{ "DSQRT  $dst,$src" %}
9881  opcode(0xFA, 0xD9);
9882  ins_encode( Push_Reg_DPR(src),
9883              OpcS, OpcP, Pop_Reg_DPR(dst) );
9884  ins_pipe( pipe_slow );
9885%}
9886
9887instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9888  predicate (UseSSE<=1);
9889  match(Set Y (PowD X Y));  // Raise X to the Yth power
9890  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9891  format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9892  ins_encode %{
9893    __ subptr(rsp, 8);
9894    __ fld_s($X$$reg - 1);
9895    __ fast_pow();
9896    __ addptr(rsp, 8);
9897  %}
9898  ins_pipe( pipe_slow );
9899%}
9900
9901instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9902  predicate (UseSSE>=2);
9903  match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9904  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9905  format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9906  ins_encode %{
9907    __ subptr(rsp, 8);
9908    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9909    __ fld_d(Address(rsp, 0));
9910    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9911    __ fld_d(Address(rsp, 0));
9912    __ fast_pow();
9913    __ fstp_d(Address(rsp, 0));
9914    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9915    __ addptr(rsp, 8);
9916  %}
9917  ins_pipe( pipe_slow );
9918%}
9919
9920instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9921  predicate (UseSSE<=1);
9922  // The source Double operand on FPU stack
9923  match(Set dst (Log10D src));
9924  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9925  // fxch         ; swap ST(0) with ST(1)
9926  // fyl2x        ; compute log_10(2) * log_2(x)
9927  format %{ "FLDLG2 \t\t\t#Log10\n\t"
9928            "FXCH   \n\t"
9929            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9930         %}
9931  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9932              Opcode(0xD9), Opcode(0xC9),   // fxch
9933              Opcode(0xD9), Opcode(0xF1));  // fyl2x
9934
9935  ins_pipe( pipe_slow );
9936%}
9937
9938instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9939  predicate (UseSSE>=2);
9940  effect(KILL cr);
9941  match(Set dst (Log10D src));
9942  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9943  // fyl2x        ; compute log_10(2) * log_2(x)
9944  format %{ "FLDLG2 \t\t\t#Log10\n\t"
9945            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9946         %}
9947  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9948              Push_SrcD(src),
9949              Opcode(0xD9), Opcode(0xF1),   // fyl2x
9950              Push_ResultD(dst));
9951
9952  ins_pipe( pipe_slow );
9953%}
9954
9955//-------------Float Instructions-------------------------------
9956// Float Math
9957
9958// Code for float compare:
9959//     fcompp();
9960//     fwait(); fnstsw_ax();
9961//     sahf();
9962//     movl(dst, unordered_result);
9963//     jcc(Assembler::parity, exit);
9964//     movl(dst, less_result);
9965//     jcc(Assembler::below, exit);
9966//     movl(dst, equal_result);
9967//     jcc(Assembler::equal, exit);
9968//     movl(dst, greater_result);
9969//   exit:
9970
9971// P6 version of float compare, sets condition codes in EFLAGS
9972instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9973  predicate(VM_Version::supports_cmov() && UseSSE == 0);
9974  match(Set cr (CmpF src1 src2));
9975  effect(KILL rax);
9976  ins_cost(150);
9977  format %{ "FLD    $src1\n\t"
9978            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9979            "JNP    exit\n\t"
9980            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9981            "SAHF\n"
9982     "exit:\tNOP               // avoid branch to branch" %}
9983  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9984  ins_encode( Push_Reg_DPR(src1),
9985              OpcP, RegOpc(src2),
9986              cmpF_P6_fixup );
9987  ins_pipe( pipe_slow );
9988%}
9989
9990instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9991  predicate(VM_Version::supports_cmov() && UseSSE == 0);
9992  match(Set cr (CmpF src1 src2));
9993  ins_cost(100);
9994  format %{ "FLD    $src1\n\t"
9995            "FUCOMIP ST,$src2  // P6 instruction" %}
9996  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9997  ins_encode( Push_Reg_DPR(src1),
9998              OpcP, RegOpc(src2));
9999  ins_pipe( pipe_slow );
10000%}
10001
10002
10003// Compare & branch
10004instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10005  predicate(UseSSE == 0);
10006  match(Set cr (CmpF src1 src2));
10007  effect(KILL rax);
10008  ins_cost(200);
10009  format %{ "FLD    $src1\n\t"
10010            "FCOMp  $src2\n\t"
10011            "FNSTSW AX\n\t"
10012            "TEST   AX,0x400\n\t"
10013            "JZ,s   flags\n\t"
10014            "MOV    AH,1\t# unordered treat as LT\n"
10015    "flags:\tSAHF" %}
10016  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10017  ins_encode( Push_Reg_DPR(src1),
10018              OpcP, RegOpc(src2),
10019              fpu_flags);
10020  ins_pipe( pipe_slow );
10021%}
10022
10023// Compare vs zero into -1,0,1
10024instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10025  predicate(UseSSE == 0);
10026  match(Set dst (CmpF3 src1 zero));
10027  effect(KILL cr, KILL rax);
10028  ins_cost(280);
10029  format %{ "FTSTF  $dst,$src1" %}
10030  opcode(0xE4, 0xD9);
10031  ins_encode( Push_Reg_DPR(src1),
10032              OpcS, OpcP, PopFPU,
10033              CmpF_Result(dst));
10034  ins_pipe( pipe_slow );
10035%}
10036
10037// Compare into -1,0,1
10038instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10039  predicate(UseSSE == 0);
10040  match(Set dst (CmpF3 src1 src2));
10041  effect(KILL cr, KILL rax);
10042  ins_cost(300);
10043  format %{ "FCMPF  $dst,$src1,$src2" %}
10044  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10045  ins_encode( Push_Reg_DPR(src1),
10046              OpcP, RegOpc(src2),
10047              CmpF_Result(dst));
10048  ins_pipe( pipe_slow );
10049%}
10050
10051// float compare and set condition codes in EFLAGS by XMM regs
10052instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10053  predicate(UseSSE>=1);
10054  match(Set cr (CmpF src1 src2));
10055  ins_cost(145);
10056  format %{ "UCOMISS $src1,$src2\n\t"
10057            "JNP,s   exit\n\t"
10058            "PUSHF\t# saw NaN, set CF\n\t"
10059            "AND     [rsp], #0xffffff2b\n\t"
10060            "POPF\n"
10061    "exit:" %}
10062  ins_encode %{
10063    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10064    emit_cmpfp_fixup(_masm);
10065  %}
10066  ins_pipe( pipe_slow );
10067%}
10068
10069instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10070  predicate(UseSSE>=1);
10071  match(Set cr (CmpF src1 src2));
10072  ins_cost(100);
10073  format %{ "UCOMISS $src1,$src2" %}
10074  ins_encode %{
10075    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10076  %}
10077  ins_pipe( pipe_slow );
10078%}
10079
10080// float compare and set condition codes in EFLAGS by XMM regs
10081instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10082  predicate(UseSSE>=1);
10083  match(Set cr (CmpF src1 (LoadF src2)));
10084  ins_cost(165);
10085  format %{ "UCOMISS $src1,$src2\n\t"
10086            "JNP,s   exit\n\t"
10087            "PUSHF\t# saw NaN, set CF\n\t"
10088            "AND     [rsp], #0xffffff2b\n\t"
10089            "POPF\n"
10090    "exit:" %}
10091  ins_encode %{
10092    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10093    emit_cmpfp_fixup(_masm);
10094  %}
10095  ins_pipe( pipe_slow );
10096%}
10097
10098instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10099  predicate(UseSSE>=1);
10100  match(Set cr (CmpF src1 (LoadF src2)));
10101  ins_cost(100);
10102  format %{ "UCOMISS $src1,$src2" %}
10103  ins_encode %{
10104    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10105  %}
10106  ins_pipe( pipe_slow );
10107%}
10108
10109// Compare into -1,0,1 in XMM
10110instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10111  predicate(UseSSE>=1);
10112  match(Set dst (CmpF3 src1 src2));
10113  effect(KILL cr);
10114  ins_cost(255);
10115  format %{ "UCOMISS $src1, $src2\n\t"
10116            "MOV     $dst, #-1\n\t"
10117            "JP,s    done\n\t"
10118            "JB,s    done\n\t"
10119            "SETNE   $dst\n\t"
10120            "MOVZB   $dst, $dst\n"
10121    "done:" %}
10122  ins_encode %{
10123    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10124    emit_cmpfp3(_masm, $dst$$Register);
10125  %}
10126  ins_pipe( pipe_slow );
10127%}
10128
10129// Compare into -1,0,1 in XMM and memory
10130instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10131  predicate(UseSSE>=1);
10132  match(Set dst (CmpF3 src1 (LoadF src2)));
10133  effect(KILL cr);
10134  ins_cost(275);
10135  format %{ "UCOMISS $src1, $src2\n\t"
10136            "MOV     $dst, #-1\n\t"
10137            "JP,s    done\n\t"
10138            "JB,s    done\n\t"
10139            "SETNE   $dst\n\t"
10140            "MOVZB   $dst, $dst\n"
10141    "done:" %}
10142  ins_encode %{
10143    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10144    emit_cmpfp3(_masm, $dst$$Register);
10145  %}
10146  ins_pipe( pipe_slow );
10147%}
10148
10149// Spill to obtain 24-bit precision
10150instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10151  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10152  match(Set dst (SubF src1 src2));
10153
10154  format %{ "FSUB   $dst,$src1 - $src2" %}
10155  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10156  ins_encode( Push_Reg_FPR(src1),
10157              OpcReg_FPR(src2),
10158              Pop_Mem_FPR(dst) );
10159  ins_pipe( fpu_mem_reg_reg );
10160%}
10161//
10162// This instruction does not round to 24-bits
10163instruct subFPR_reg(regFPR dst, regFPR src) %{
10164  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10165  match(Set dst (SubF dst src));
10166
10167  format %{ "FSUB   $dst,$src" %}
10168  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10169  ins_encode( Push_Reg_FPR(src),
10170              OpcP, RegOpc(dst) );
10171  ins_pipe( fpu_reg_reg );
10172%}
10173
10174// Spill to obtain 24-bit precision
10175instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10176  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10177  match(Set dst (AddF src1 src2));
10178
10179  format %{ "FADD   $dst,$src1,$src2" %}
10180  opcode(0xD8, 0x0); /* D8 C0+i */
10181  ins_encode( Push_Reg_FPR(src2),
10182              OpcReg_FPR(src1),
10183              Pop_Mem_FPR(dst) );
10184  ins_pipe( fpu_mem_reg_reg );
10185%}
10186//
10187// This instruction does not round to 24-bits
10188instruct addFPR_reg(regFPR dst, regFPR src) %{
10189  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10190  match(Set dst (AddF dst src));
10191
10192  format %{ "FLD    $src\n\t"
10193            "FADDp  $dst,ST" %}
10194  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10195  ins_encode( Push_Reg_FPR(src),
10196              OpcP, RegOpc(dst) );
10197  ins_pipe( fpu_reg_reg );
10198%}
10199
10200instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10201  predicate(UseSSE==0);
10202  match(Set dst (AbsF src));
10203  ins_cost(100);
10204  format %{ "FABS" %}
10205  opcode(0xE1, 0xD9);
10206  ins_encode( OpcS, OpcP );
10207  ins_pipe( fpu_reg_reg );
10208%}
10209
10210instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10211  predicate(UseSSE==0);
10212  match(Set dst (NegF src));
10213  ins_cost(100);
10214  format %{ "FCHS" %}
10215  opcode(0xE0, 0xD9);
10216  ins_encode( OpcS, OpcP );
10217  ins_pipe( fpu_reg_reg );
10218%}
10219
10220// Cisc-alternate to addFPR_reg
10221// Spill to obtain 24-bit precision
10222instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10223  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10224  match(Set dst (AddF src1 (LoadF src2)));
10225
10226  format %{ "FLD    $src2\n\t"
10227            "FADD   ST,$src1\n\t"
10228            "FSTP_S $dst" %}
10229  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10230  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10231              OpcReg_FPR(src1),
10232              Pop_Mem_FPR(dst) );
10233  ins_pipe( fpu_mem_reg_mem );
10234%}
10235//
10236// Cisc-alternate to addFPR_reg
10237// This instruction does not round to 24-bits
10238instruct addFPR_reg_mem(regFPR dst, memory src) %{
10239  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10240  match(Set dst (AddF dst (LoadF src)));
10241
10242  format %{ "FADD   $dst,$src" %}
10243  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10244  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10245              OpcP, RegOpc(dst) );
10246  ins_pipe( fpu_reg_mem );
10247%}
10248
10249// // Following two instructions for _222_mpegaudio
10250// Spill to obtain 24-bit precision
10251instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10252  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10253  match(Set dst (AddF src1 src2));
10254
10255  format %{ "FADD   $dst,$src1,$src2" %}
10256  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10257  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10258              OpcReg_FPR(src2),
10259              Pop_Mem_FPR(dst) );
10260  ins_pipe( fpu_mem_reg_mem );
10261%}
10262
10263// Cisc-spill variant
10264// Spill to obtain 24-bit precision
10265instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10266  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10267  match(Set dst (AddF src1 (LoadF src2)));
10268
10269  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10270  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10271  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10272              set_instruction_start,
10273              OpcP, RMopc_Mem(secondary,src1),
10274              Pop_Mem_FPR(dst) );
10275  ins_pipe( fpu_mem_mem_mem );
10276%}
10277
10278// Spill to obtain 24-bit precision
10279instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10280  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10281  match(Set dst (AddF src1 src2));
10282
10283  format %{ "FADD   $dst,$src1,$src2" %}
10284  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10285  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10286              set_instruction_start,
10287              OpcP, RMopc_Mem(secondary,src1),
10288              Pop_Mem_FPR(dst) );
10289  ins_pipe( fpu_mem_mem_mem );
10290%}
10291
10292
10293// Spill to obtain 24-bit precision
10294instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10295  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10296  match(Set dst (AddF src con));
10297  format %{ "FLD    $src\n\t"
10298            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10299            "FSTP_S $dst"  %}
10300  ins_encode %{
10301    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10302    __ fadd_s($constantaddress($con));
10303    __ fstp_s(Address(rsp, $dst$$disp));
10304  %}
10305  ins_pipe(fpu_mem_reg_con);
10306%}
10307//
10308// This instruction does not round to 24-bits
10309instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10310  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10311  match(Set dst (AddF src con));
10312  format %{ "FLD    $src\n\t"
10313            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10314            "FSTP   $dst"  %}
10315  ins_encode %{
10316    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10317    __ fadd_s($constantaddress($con));
10318    __ fstp_d($dst$$reg);
10319  %}
10320  ins_pipe(fpu_reg_reg_con);
10321%}
10322
10323// Spill to obtain 24-bit precision
10324instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10325  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10326  match(Set dst (MulF src1 src2));
10327
10328  format %{ "FLD    $src1\n\t"
10329            "FMUL   $src2\n\t"
10330            "FSTP_S $dst"  %}
10331  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10332  ins_encode( Push_Reg_FPR(src1),
10333              OpcReg_FPR(src2),
10334              Pop_Mem_FPR(dst) );
10335  ins_pipe( fpu_mem_reg_reg );
10336%}
10337//
10338// This instruction does not round to 24-bits
10339instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10340  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10341  match(Set dst (MulF src1 src2));
10342
10343  format %{ "FLD    $src1\n\t"
10344            "FMUL   $src2\n\t"
10345            "FSTP_S $dst"  %}
10346  opcode(0xD8, 0x1); /* D8 C8+i */
10347  ins_encode( Push_Reg_FPR(src2),
10348              OpcReg_FPR(src1),
10349              Pop_Reg_FPR(dst) );
10350  ins_pipe( fpu_reg_reg_reg );
10351%}
10352
10353
10354// Spill to obtain 24-bit precision
10355// Cisc-alternate to reg-reg multiply
10356instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10357  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10358  match(Set dst (MulF src1 (LoadF src2)));
10359
10360  format %{ "FLD_S  $src2\n\t"
10361            "FMUL   $src1\n\t"
10362            "FSTP_S $dst"  %}
10363  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10364  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10365              OpcReg_FPR(src1),
10366              Pop_Mem_FPR(dst) );
10367  ins_pipe( fpu_mem_reg_mem );
10368%}
10369//
10370// This instruction does not round to 24-bits
10371// Cisc-alternate to reg-reg multiply
10372instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10373  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10374  match(Set dst (MulF src1 (LoadF src2)));
10375
10376  format %{ "FMUL   $dst,$src1,$src2" %}
10377  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10378  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10379              OpcReg_FPR(src1),
10380              Pop_Reg_FPR(dst) );
10381  ins_pipe( fpu_reg_reg_mem );
10382%}
10383
10384// Spill to obtain 24-bit precision
10385instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10386  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10387  match(Set dst (MulF src1 src2));
10388
10389  format %{ "FMUL   $dst,$src1,$src2" %}
10390  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10391  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10392              set_instruction_start,
10393              OpcP, RMopc_Mem(secondary,src1),
10394              Pop_Mem_FPR(dst) );
10395  ins_pipe( fpu_mem_mem_mem );
10396%}
10397
10398// Spill to obtain 24-bit precision
10399instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10400  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10401  match(Set dst (MulF src con));
10402
10403  format %{ "FLD    $src\n\t"
10404            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10405            "FSTP_S $dst"  %}
10406  ins_encode %{
10407    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10408    __ fmul_s($constantaddress($con));
10409    __ fstp_s(Address(rsp, $dst$$disp));
10410  %}
10411  ins_pipe(fpu_mem_reg_con);
10412%}
10413//
10414// This instruction does not round to 24-bits
10415instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10416  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10417  match(Set dst (MulF src con));
10418
10419  format %{ "FLD    $src\n\t"
10420            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10421            "FSTP   $dst"  %}
10422  ins_encode %{
10423    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10424    __ fmul_s($constantaddress($con));
10425    __ fstp_d($dst$$reg);
10426  %}
10427  ins_pipe(fpu_reg_reg_con);
10428%}
10429
10430
10431//
10432// MACRO1 -- subsume unshared load into mulFPR
10433// This instruction does not round to 24-bits
10434instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10435  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10436  match(Set dst (MulF (LoadF mem1) src));
10437
10438  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10439            "FMUL   ST,$src\n\t"
10440            "FSTP   $dst" %}
10441  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10442  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10443              OpcReg_FPR(src),
10444              Pop_Reg_FPR(dst) );
10445  ins_pipe( fpu_reg_reg_mem );
10446%}
10447//
10448// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10449// This instruction does not round to 24-bits
10450instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10451  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10452  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10453  ins_cost(95);
10454
10455  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10456            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10457            "FADD   ST,$src2\n\t"
10458            "FSTP   $dst" %}
10459  opcode(0xD9); /* LoadF D9 /0 */
10460  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10461              FMul_ST_reg(src1),
10462              FAdd_ST_reg(src2),
10463              Pop_Reg_FPR(dst) );
10464  ins_pipe( fpu_reg_mem_reg_reg );
10465%}
10466
10467// MACRO3 -- addFPR a mulFPR
10468// This instruction does not round to 24-bits.  It is a '2-address'
10469// instruction in that the result goes back to src2.  This eliminates
10470// a move from the macro; possibly the register allocator will have
10471// to add it back (and maybe not).
10472instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10473  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10474  match(Set src2 (AddF (MulF src0 src1) src2));
10475
10476  format %{ "FLD    $src0     ===MACRO3===\n\t"
10477            "FMUL   ST,$src1\n\t"
10478            "FADDP  $src2,ST" %}
10479  opcode(0xD9); /* LoadF D9 /0 */
10480  ins_encode( Push_Reg_FPR(src0),
10481              FMul_ST_reg(src1),
10482              FAddP_reg_ST(src2) );
10483  ins_pipe( fpu_reg_reg_reg );
10484%}
10485
10486// MACRO4 -- divFPR subFPR
10487// This instruction does not round to 24-bits
10488instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10489  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10490  match(Set dst (DivF (SubF src2 src1) src3));
10491
10492  format %{ "FLD    $src2   ===MACRO4===\n\t"
10493            "FSUB   ST,$src1\n\t"
10494            "FDIV   ST,$src3\n\t"
10495            "FSTP  $dst" %}
10496  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10497  ins_encode( Push_Reg_FPR(src2),
10498              subFPR_divFPR_encode(src1,src3),
10499              Pop_Reg_FPR(dst) );
10500  ins_pipe( fpu_reg_reg_reg_reg );
10501%}
10502
10503// Spill to obtain 24-bit precision
10504instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10505  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10506  match(Set dst (DivF src1 src2));
10507
10508  format %{ "FDIV   $dst,$src1,$src2" %}
10509  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10510  ins_encode( Push_Reg_FPR(src1),
10511              OpcReg_FPR(src2),
10512              Pop_Mem_FPR(dst) );
10513  ins_pipe( fpu_mem_reg_reg );
10514%}
10515//
10516// This instruction does not round to 24-bits
10517instruct divFPR_reg(regFPR dst, regFPR src) %{
10518  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10519  match(Set dst (DivF dst src));
10520
10521  format %{ "FDIV   $dst,$src" %}
10522  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10523  ins_encode( Push_Reg_FPR(src),
10524              OpcP, RegOpc(dst) );
10525  ins_pipe( fpu_reg_reg );
10526%}
10527
10528
10529// Spill to obtain 24-bit precision
10530instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10531  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10532  match(Set dst (ModF src1 src2));
10533  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10534
10535  format %{ "FMOD   $dst,$src1,$src2" %}
10536  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10537              emitModDPR(),
10538              Push_Result_Mod_DPR(src2),
10539              Pop_Mem_FPR(dst));
10540  ins_pipe( pipe_slow );
10541%}
10542//
10543// This instruction does not round to 24-bits
10544instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10545  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10546  match(Set dst (ModF dst src));
10547  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10548
10549  format %{ "FMOD   $dst,$src" %}
10550  ins_encode(Push_Reg_Mod_DPR(dst, src),
10551              emitModDPR(),
10552              Push_Result_Mod_DPR(src),
10553              Pop_Reg_FPR(dst));
10554  ins_pipe( pipe_slow );
10555%}
10556
10557instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10558  predicate(UseSSE>=1);
10559  match(Set dst (ModF src0 src1));
10560  effect(KILL rax, KILL cr);
10561  format %{ "SUB    ESP,4\t # FMOD\n"
10562          "\tMOVSS  [ESP+0],$src1\n"
10563          "\tFLD_S  [ESP+0]\n"
10564          "\tMOVSS  [ESP+0],$src0\n"
10565          "\tFLD_S  [ESP+0]\n"
10566     "loop:\tFPREM\n"
10567          "\tFWAIT\n"
10568          "\tFNSTSW AX\n"
10569          "\tSAHF\n"
10570          "\tJP     loop\n"
10571          "\tFSTP_S [ESP+0]\n"
10572          "\tMOVSS  $dst,[ESP+0]\n"
10573          "\tADD    ESP,4\n"
10574          "\tFSTP   ST0\t # Restore FPU Stack"
10575    %}
10576  ins_cost(250);
10577  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10578  ins_pipe( pipe_slow );
10579%}
10580
10581
10582//----------Arithmetic Conversion Instructions---------------------------------
10583// The conversions operations are all Alpha sorted.  Please keep it that way!
10584
10585instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10586  predicate(UseSSE==0);
10587  match(Set dst (RoundFloat src));
10588  ins_cost(125);
10589  format %{ "FST_S  $dst,$src\t# F-round" %}
10590  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10591  ins_pipe( fpu_mem_reg );
10592%}
10593
10594instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10595  predicate(UseSSE<=1);
10596  match(Set dst (RoundDouble src));
10597  ins_cost(125);
10598  format %{ "FST_D  $dst,$src\t# D-round" %}
10599  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10600  ins_pipe( fpu_mem_reg );
10601%}
10602
10603// Force rounding to 24-bit precision and 6-bit exponent
10604instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10605  predicate(UseSSE==0);
10606  match(Set dst (ConvD2F src));
10607  format %{ "FST_S  $dst,$src\t# F-round" %}
10608  expand %{
10609    roundFloat_mem_reg(dst,src);
10610  %}
10611%}
10612
10613// Force rounding to 24-bit precision and 6-bit exponent
10614instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10615  predicate(UseSSE==1);
10616  match(Set dst (ConvD2F src));
10617  effect( KILL cr );
10618  format %{ "SUB    ESP,4\n\t"
10619            "FST_S  [ESP],$src\t# F-round\n\t"
10620            "MOVSS  $dst,[ESP]\n\t"
10621            "ADD ESP,4" %}
10622  ins_encode %{
10623    __ subptr(rsp, 4);
10624    if ($src$$reg != FPR1L_enc) {
10625      __ fld_s($src$$reg-1);
10626      __ fstp_s(Address(rsp, 0));
10627    } else {
10628      __ fst_s(Address(rsp, 0));
10629    }
10630    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10631    __ addptr(rsp, 4);
10632  %}
10633  ins_pipe( pipe_slow );
10634%}
10635
10636// Force rounding double precision to single precision
10637instruct convD2F_reg(regF dst, regD src) %{
10638  predicate(UseSSE>=2);
10639  match(Set dst (ConvD2F src));
10640  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10641  ins_encode %{
10642    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10643  %}
10644  ins_pipe( pipe_slow );
10645%}
10646
10647instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10648  predicate(UseSSE==0);
10649  match(Set dst (ConvF2D src));
10650  format %{ "FST_S  $dst,$src\t# D-round" %}
10651  ins_encode( Pop_Reg_Reg_DPR(dst, src));
10652  ins_pipe( fpu_reg_reg );
10653%}
10654
10655instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10656  predicate(UseSSE==1);
10657  match(Set dst (ConvF2D src));
10658  format %{ "FST_D  $dst,$src\t# D-round" %}
10659  expand %{
10660    roundDouble_mem_reg(dst,src);
10661  %}
10662%}
10663
10664instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10665  predicate(UseSSE==1);
10666  match(Set dst (ConvF2D src));
10667  effect( KILL cr );
10668  format %{ "SUB    ESP,4\n\t"
10669            "MOVSS  [ESP] $src\n\t"
10670            "FLD_S  [ESP]\n\t"
10671            "ADD    ESP,4\n\t"
10672            "FSTP   $dst\t# D-round" %}
10673  ins_encode %{
10674    __ subptr(rsp, 4);
10675    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10676    __ fld_s(Address(rsp, 0));
10677    __ addptr(rsp, 4);
10678    __ fstp_d($dst$$reg);
10679  %}
10680  ins_pipe( pipe_slow );
10681%}
10682
10683instruct convF2D_reg(regD dst, regF src) %{
10684  predicate(UseSSE>=2);
10685  match(Set dst (ConvF2D src));
10686  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10687  ins_encode %{
10688    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10689  %}
10690  ins_pipe( pipe_slow );
10691%}
10692
10693// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10694instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10695  predicate(UseSSE<=1);
10696  match(Set dst (ConvD2I src));
10697  effect( KILL tmp, KILL cr );
10698  format %{ "FLD    $src\t# Convert double to int \n\t"
10699            "FLDCW  trunc mode\n\t"
10700            "SUB    ESP,4\n\t"
10701            "FISTp  [ESP + #0]\n\t"
10702            "FLDCW  std/24-bit mode\n\t"
10703            "POP    EAX\n\t"
10704            "CMP    EAX,0x80000000\n\t"
10705            "JNE,s  fast\n\t"
10706            "FLD_D  $src\n\t"
10707            "CALL   d2i_wrapper\n"
10708      "fast:" %}
10709  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10710  ins_pipe( pipe_slow );
10711%}
10712
10713// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10714instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10715  predicate(UseSSE>=2);
10716  match(Set dst (ConvD2I src));
10717  effect( KILL tmp, KILL cr );
10718  format %{ "CVTTSD2SI $dst, $src\n\t"
10719            "CMP    $dst,0x80000000\n\t"
10720            "JNE,s  fast\n\t"
10721            "SUB    ESP, 8\n\t"
10722            "MOVSD  [ESP], $src\n\t"
10723            "FLD_D  [ESP]\n\t"
10724            "ADD    ESP, 8\n\t"
10725            "CALL   d2i_wrapper\n"
10726      "fast:" %}
10727  ins_encode %{
10728    Label fast;
10729    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10730    __ cmpl($dst$$Register, 0x80000000);
10731    __ jccb(Assembler::notEqual, fast);
10732    __ subptr(rsp, 8);
10733    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10734    __ fld_d(Address(rsp, 0));
10735    __ addptr(rsp, 8);
10736    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10737    __ bind(fast);
10738  %}
10739  ins_pipe( pipe_slow );
10740%}
10741
10742instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10743  predicate(UseSSE<=1);
10744  match(Set dst (ConvD2L src));
10745  effect( KILL cr );
10746  format %{ "FLD    $src\t# Convert double to long\n\t"
10747            "FLDCW  trunc mode\n\t"
10748            "SUB    ESP,8\n\t"
10749            "FISTp  [ESP + #0]\n\t"
10750            "FLDCW  std/24-bit mode\n\t"
10751            "POP    EAX\n\t"
10752            "POP    EDX\n\t"
10753            "CMP    EDX,0x80000000\n\t"
10754            "JNE,s  fast\n\t"
10755            "TEST   EAX,EAX\n\t"
10756            "JNE,s  fast\n\t"
10757            "FLD    $src\n\t"
10758            "CALL   d2l_wrapper\n"
10759      "fast:" %}
10760  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10761  ins_pipe( pipe_slow );
10762%}
10763
10764// XMM lacks a float/double->long conversion, so use the old FPU stack.
10765instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10766  predicate (UseSSE>=2);
10767  match(Set dst (ConvD2L src));
10768  effect( KILL cr );
10769  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10770            "MOVSD  [ESP],$src\n\t"
10771            "FLD_D  [ESP]\n\t"
10772            "FLDCW  trunc mode\n\t"
10773            "FISTp  [ESP + #0]\n\t"
10774            "FLDCW  std/24-bit mode\n\t"
10775            "POP    EAX\n\t"
10776            "POP    EDX\n\t"
10777            "CMP    EDX,0x80000000\n\t"
10778            "JNE,s  fast\n\t"
10779            "TEST   EAX,EAX\n\t"
10780            "JNE,s  fast\n\t"
10781            "SUB    ESP,8\n\t"
10782            "MOVSD  [ESP],$src\n\t"
10783            "FLD_D  [ESP]\n\t"
10784            "ADD    ESP,8\n\t"
10785            "CALL   d2l_wrapper\n"
10786      "fast:" %}
10787  ins_encode %{
10788    Label fast;
10789    __ subptr(rsp, 8);
10790    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10791    __ fld_d(Address(rsp, 0));
10792    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10793    __ fistp_d(Address(rsp, 0));
10794    // Restore the rounding mode, mask the exception
10795    if (Compile::current()->in_24_bit_fp_mode()) {
10796      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10797    } else {
10798      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10799    }
10800    // Load the converted long, adjust CPU stack
10801    __ pop(rax);
10802    __ pop(rdx);
10803    __ cmpl(rdx, 0x80000000);
10804    __ jccb(Assembler::notEqual, fast);
10805    __ testl(rax, rax);
10806    __ jccb(Assembler::notEqual, fast);
10807    __ subptr(rsp, 8);
10808    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10809    __ fld_d(Address(rsp, 0));
10810    __ addptr(rsp, 8);
10811    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10812    __ bind(fast);
10813  %}
10814  ins_pipe( pipe_slow );
10815%}
10816
10817// Convert a double to an int.  Java semantics require we do complex
10818// manglations in the corner cases.  So we set the rounding mode to
10819// 'zero', store the darned double down as an int, and reset the
10820// rounding mode to 'nearest'.  The hardware stores a flag value down
10821// if we would overflow or converted a NAN; we check for this and
10822// and go the slow path if needed.
10823instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10824  predicate(UseSSE==0);
10825  match(Set dst (ConvF2I src));
10826  effect( KILL tmp, KILL cr );
10827  format %{ "FLD    $src\t# Convert float to int \n\t"
10828            "FLDCW  trunc mode\n\t"
10829            "SUB    ESP,4\n\t"
10830            "FISTp  [ESP + #0]\n\t"
10831            "FLDCW  std/24-bit mode\n\t"
10832            "POP    EAX\n\t"
10833            "CMP    EAX,0x80000000\n\t"
10834            "JNE,s  fast\n\t"
10835            "FLD    $src\n\t"
10836            "CALL   d2i_wrapper\n"
10837      "fast:" %}
10838  // DPR2I_encoding works for FPR2I
10839  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10840  ins_pipe( pipe_slow );
10841%}
10842
10843// Convert a float in xmm to an int reg.
10844instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10845  predicate(UseSSE>=1);
10846  match(Set dst (ConvF2I src));
10847  effect( KILL tmp, KILL cr );
10848  format %{ "CVTTSS2SI $dst, $src\n\t"
10849            "CMP    $dst,0x80000000\n\t"
10850            "JNE,s  fast\n\t"
10851            "SUB    ESP, 4\n\t"
10852            "MOVSS  [ESP], $src\n\t"
10853            "FLD    [ESP]\n\t"
10854            "ADD    ESP, 4\n\t"
10855            "CALL   d2i_wrapper\n"
10856      "fast:" %}
10857  ins_encode %{
10858    Label fast;
10859    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10860    __ cmpl($dst$$Register, 0x80000000);
10861    __ jccb(Assembler::notEqual, fast);
10862    __ subptr(rsp, 4);
10863    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10864    __ fld_s(Address(rsp, 0));
10865    __ addptr(rsp, 4);
10866    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10867    __ bind(fast);
10868  %}
10869  ins_pipe( pipe_slow );
10870%}
10871
10872instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10873  predicate(UseSSE==0);
10874  match(Set dst (ConvF2L src));
10875  effect( KILL cr );
10876  format %{ "FLD    $src\t# Convert float to long\n\t"
10877            "FLDCW  trunc mode\n\t"
10878            "SUB    ESP,8\n\t"
10879            "FISTp  [ESP + #0]\n\t"
10880            "FLDCW  std/24-bit mode\n\t"
10881            "POP    EAX\n\t"
10882            "POP    EDX\n\t"
10883            "CMP    EDX,0x80000000\n\t"
10884            "JNE,s  fast\n\t"
10885            "TEST   EAX,EAX\n\t"
10886            "JNE,s  fast\n\t"
10887            "FLD    $src\n\t"
10888            "CALL   d2l_wrapper\n"
10889      "fast:" %}
10890  // DPR2L_encoding works for FPR2L
10891  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10892  ins_pipe( pipe_slow );
10893%}
10894
10895// XMM lacks a float/double->long conversion, so use the old FPU stack.
10896instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10897  predicate (UseSSE>=1);
10898  match(Set dst (ConvF2L src));
10899  effect( KILL cr );
10900  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10901            "MOVSS  [ESP],$src\n\t"
10902            "FLD_S  [ESP]\n\t"
10903            "FLDCW  trunc mode\n\t"
10904            "FISTp  [ESP + #0]\n\t"
10905            "FLDCW  std/24-bit mode\n\t"
10906            "POP    EAX\n\t"
10907            "POP    EDX\n\t"
10908            "CMP    EDX,0x80000000\n\t"
10909            "JNE,s  fast\n\t"
10910            "TEST   EAX,EAX\n\t"
10911            "JNE,s  fast\n\t"
10912            "SUB    ESP,4\t# Convert float to long\n\t"
10913            "MOVSS  [ESP],$src\n\t"
10914            "FLD_S  [ESP]\n\t"
10915            "ADD    ESP,4\n\t"
10916            "CALL   d2l_wrapper\n"
10917      "fast:" %}
10918  ins_encode %{
10919    Label fast;
10920    __ subptr(rsp, 8);
10921    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10922    __ fld_s(Address(rsp, 0));
10923    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10924    __ fistp_d(Address(rsp, 0));
10925    // Restore the rounding mode, mask the exception
10926    if (Compile::current()->in_24_bit_fp_mode()) {
10927      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10928    } else {
10929      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10930    }
10931    // Load the converted long, adjust CPU stack
10932    __ pop(rax);
10933    __ pop(rdx);
10934    __ cmpl(rdx, 0x80000000);
10935    __ jccb(Assembler::notEqual, fast);
10936    __ testl(rax, rax);
10937    __ jccb(Assembler::notEqual, fast);
10938    __ subptr(rsp, 4);
10939    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10940    __ fld_s(Address(rsp, 0));
10941    __ addptr(rsp, 4);
10942    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10943    __ bind(fast);
10944  %}
10945  ins_pipe( pipe_slow );
10946%}
10947
10948instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10949  predicate( UseSSE<=1 );
10950  match(Set dst (ConvI2D src));
10951  format %{ "FILD   $src\n\t"
10952            "FSTP   $dst" %}
10953  opcode(0xDB, 0x0);  /* DB /0 */
10954  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10955  ins_pipe( fpu_reg_mem );
10956%}
10957
10958instruct convI2D_reg(regD dst, rRegI src) %{
10959  predicate( UseSSE>=2 && !UseXmmI2D );
10960  match(Set dst (ConvI2D src));
10961  format %{ "CVTSI2SD $dst,$src" %}
10962  ins_encode %{
10963    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10964  %}
10965  ins_pipe( pipe_slow );
10966%}
10967
10968instruct convI2D_mem(regD dst, memory mem) %{
10969  predicate( UseSSE>=2 );
10970  match(Set dst (ConvI2D (LoadI mem)));
10971  format %{ "CVTSI2SD $dst,$mem" %}
10972  ins_encode %{
10973    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10974  %}
10975  ins_pipe( pipe_slow );
10976%}
10977
10978instruct convXI2D_reg(regD dst, rRegI src)
10979%{
10980  predicate( UseSSE>=2 && UseXmmI2D );
10981  match(Set dst (ConvI2D src));
10982
10983  format %{ "MOVD  $dst,$src\n\t"
10984            "CVTDQ2PD $dst,$dst\t# i2d" %}
10985  ins_encode %{
10986    __ movdl($dst$$XMMRegister, $src$$Register);
10987    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10988  %}
10989  ins_pipe(pipe_slow); // XXX
10990%}
10991
10992instruct convI2DPR_mem(regDPR dst, memory mem) %{
10993  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10994  match(Set dst (ConvI2D (LoadI mem)));
10995  format %{ "FILD   $mem\n\t"
10996            "FSTP   $dst" %}
10997  opcode(0xDB);      /* DB /0 */
10998  ins_encode( OpcP, RMopc_Mem(0x00,mem),
10999              Pop_Reg_DPR(dst));
11000  ins_pipe( fpu_reg_mem );
11001%}
11002
11003// Convert a byte to a float; no rounding step needed.
11004instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11005  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11006  match(Set dst (ConvI2F src));
11007  format %{ "FILD   $src\n\t"
11008            "FSTP   $dst" %}
11009
11010  opcode(0xDB, 0x0);  /* DB /0 */
11011  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11012  ins_pipe( fpu_reg_mem );
11013%}
11014
11015// In 24-bit mode, force exponent rounding by storing back out
11016instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11017  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11018  match(Set dst (ConvI2F src));
11019  ins_cost(200);
11020  format %{ "FILD   $src\n\t"
11021            "FSTP_S $dst" %}
11022  opcode(0xDB, 0x0);  /* DB /0 */
11023  ins_encode( Push_Mem_I(src),
11024              Pop_Mem_FPR(dst));
11025  ins_pipe( fpu_mem_mem );
11026%}
11027
11028// In 24-bit mode, force exponent rounding by storing back out
11029instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11030  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11031  match(Set dst (ConvI2F (LoadI mem)));
11032  ins_cost(200);
11033  format %{ "FILD   $mem\n\t"
11034            "FSTP_S $dst" %}
11035  opcode(0xDB);  /* DB /0 */
11036  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11037              Pop_Mem_FPR(dst));
11038  ins_pipe( fpu_mem_mem );
11039%}
11040
11041// This instruction does not round to 24-bits
11042instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11043  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11044  match(Set dst (ConvI2F src));
11045  format %{ "FILD   $src\n\t"
11046            "FSTP   $dst" %}
11047  opcode(0xDB, 0x0);  /* DB /0 */
11048  ins_encode( Push_Mem_I(src),
11049              Pop_Reg_FPR(dst));
11050  ins_pipe( fpu_reg_mem );
11051%}
11052
11053// This instruction does not round to 24-bits
11054instruct convI2FPR_mem(regFPR dst, memory mem) %{
11055  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11056  match(Set dst (ConvI2F (LoadI mem)));
11057  format %{ "FILD   $mem\n\t"
11058            "FSTP   $dst" %}
11059  opcode(0xDB);      /* DB /0 */
11060  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11061              Pop_Reg_FPR(dst));
11062  ins_pipe( fpu_reg_mem );
11063%}
11064
11065// Convert an int to a float in xmm; no rounding step needed.
11066instruct convI2F_reg(regF dst, rRegI src) %{
11067  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11068  match(Set dst (ConvI2F src));
11069  format %{ "CVTSI2SS $dst, $src" %}
11070  ins_encode %{
11071    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11072  %}
11073  ins_pipe( pipe_slow );
11074%}
11075
11076 instruct convXI2F_reg(regF dst, rRegI src)
11077%{
11078  predicate( UseSSE>=2 && UseXmmI2F );
11079  match(Set dst (ConvI2F src));
11080
11081  format %{ "MOVD  $dst,$src\n\t"
11082            "CVTDQ2PS $dst,$dst\t# i2f" %}
11083  ins_encode %{
11084    __ movdl($dst$$XMMRegister, $src$$Register);
11085    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11086  %}
11087  ins_pipe(pipe_slow); // XXX
11088%}
11089
11090instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11091  match(Set dst (ConvI2L src));
11092  effect(KILL cr);
11093  ins_cost(375);
11094  format %{ "MOV    $dst.lo,$src\n\t"
11095            "MOV    $dst.hi,$src\n\t"
11096            "SAR    $dst.hi,31" %}
11097  ins_encode(convert_int_long(dst,src));
11098  ins_pipe( ialu_reg_reg_long );
11099%}
11100
11101// Zero-extend convert int to long
11102instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11103  match(Set dst (AndL (ConvI2L src) mask) );
11104  effect( KILL flags );
11105  ins_cost(250);
11106  format %{ "MOV    $dst.lo,$src\n\t"
11107            "XOR    $dst.hi,$dst.hi" %}
11108  opcode(0x33); // XOR
11109  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11110  ins_pipe( ialu_reg_reg_long );
11111%}
11112
11113// Zero-extend long
11114instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11115  match(Set dst (AndL src mask) );
11116  effect( KILL flags );
11117  ins_cost(250);
11118  format %{ "MOV    $dst.lo,$src.lo\n\t"
11119            "XOR    $dst.hi,$dst.hi\n\t" %}
11120  opcode(0x33); // XOR
11121  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11122  ins_pipe( ialu_reg_reg_long );
11123%}
11124
11125instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11126  predicate (UseSSE<=1);
11127  match(Set dst (ConvL2D src));
11128  effect( KILL cr );
11129  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11130            "PUSH   $src.lo\n\t"
11131            "FILD   ST,[ESP + #0]\n\t"
11132            "ADD    ESP,8\n\t"
11133            "FSTP_D $dst\t# D-round" %}
11134  opcode(0xDF, 0x5);  /* DF /5 */
11135  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11136  ins_pipe( pipe_slow );
11137%}
11138
11139instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11140  predicate (UseSSE>=2);
11141  match(Set dst (ConvL2D src));
11142  effect( KILL cr );
11143  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11144            "PUSH   $src.lo\n\t"
11145            "FILD_D [ESP]\n\t"
11146            "FSTP_D [ESP]\n\t"
11147            "MOVSD  $dst,[ESP]\n\t"
11148            "ADD    ESP,8" %}
11149  opcode(0xDF, 0x5);  /* DF /5 */
11150  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11151  ins_pipe( pipe_slow );
11152%}
11153
11154instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11155  predicate (UseSSE>=1);
11156  match(Set dst (ConvL2F src));
11157  effect( KILL cr );
11158  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11159            "PUSH   $src.lo\n\t"
11160            "FILD_D [ESP]\n\t"
11161            "FSTP_S [ESP]\n\t"
11162            "MOVSS  $dst,[ESP]\n\t"
11163            "ADD    ESP,8" %}
11164  opcode(0xDF, 0x5);  /* DF /5 */
11165  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11166  ins_pipe( pipe_slow );
11167%}
11168
11169instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11170  match(Set dst (ConvL2F src));
11171  effect( KILL cr );
11172  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11173            "PUSH   $src.lo\n\t"
11174            "FILD   ST,[ESP + #0]\n\t"
11175            "ADD    ESP,8\n\t"
11176            "FSTP_S $dst\t# F-round" %}
11177  opcode(0xDF, 0x5);  /* DF /5 */
11178  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11179  ins_pipe( pipe_slow );
11180%}
11181
11182instruct convL2I_reg( rRegI dst, eRegL src ) %{
11183  match(Set dst (ConvL2I src));
11184  effect( DEF dst, USE src );
11185  format %{ "MOV    $dst,$src.lo" %}
11186  ins_encode(enc_CopyL_Lo(dst,src));
11187  ins_pipe( ialu_reg_reg );
11188%}
11189
11190instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11191  match(Set dst (MoveF2I src));
11192  effect( DEF dst, USE src );
11193  ins_cost(100);
11194  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11195  ins_encode %{
11196    __ movl($dst$$Register, Address(rsp, $src$$disp));
11197  %}
11198  ins_pipe( ialu_reg_mem );
11199%}
11200
11201instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11202  predicate(UseSSE==0);
11203  match(Set dst (MoveF2I src));
11204  effect( DEF dst, USE src );
11205
11206  ins_cost(125);
11207  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11208  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11209  ins_pipe( fpu_mem_reg );
11210%}
11211
11212instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11213  predicate(UseSSE>=1);
11214  match(Set dst (MoveF2I src));
11215  effect( DEF dst, USE src );
11216
11217  ins_cost(95);
11218  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11219  ins_encode %{
11220    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11221  %}
11222  ins_pipe( pipe_slow );
11223%}
11224
11225instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11226  predicate(UseSSE>=2);
11227  match(Set dst (MoveF2I src));
11228  effect( DEF dst, USE src );
11229  ins_cost(85);
11230  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11231  ins_encode %{
11232    __ movdl($dst$$Register, $src$$XMMRegister);
11233  %}
11234  ins_pipe( pipe_slow );
11235%}
11236
11237instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11238  match(Set dst (MoveI2F src));
11239  effect( DEF dst, USE src );
11240
11241  ins_cost(100);
11242  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11243  ins_encode %{
11244    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11245  %}
11246  ins_pipe( ialu_mem_reg );
11247%}
11248
11249
11250instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11251  predicate(UseSSE==0);
11252  match(Set dst (MoveI2F src));
11253  effect(DEF dst, USE src);
11254
11255  ins_cost(125);
11256  format %{ "FLD_S  $src\n\t"
11257            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11258  opcode(0xD9);               /* D9 /0, FLD m32real */
11259  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11260              Pop_Reg_FPR(dst) );
11261  ins_pipe( fpu_reg_mem );
11262%}
11263
11264instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11265  predicate(UseSSE>=1);
11266  match(Set dst (MoveI2F src));
11267  effect( DEF dst, USE src );
11268
11269  ins_cost(95);
11270  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11271  ins_encode %{
11272    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11273  %}
11274  ins_pipe( pipe_slow );
11275%}
11276
11277instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11278  predicate(UseSSE>=2);
11279  match(Set dst (MoveI2F src));
11280  effect( DEF dst, USE src );
11281
11282  ins_cost(85);
11283  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11284  ins_encode %{
11285    __ movdl($dst$$XMMRegister, $src$$Register);
11286  %}
11287  ins_pipe( pipe_slow );
11288%}
11289
11290instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11291  match(Set dst (MoveD2L src));
11292  effect(DEF dst, USE src);
11293
11294  ins_cost(250);
11295  format %{ "MOV    $dst.lo,$src\n\t"
11296            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11297  opcode(0x8B, 0x8B);
11298  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11299  ins_pipe( ialu_mem_long_reg );
11300%}
11301
11302instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11303  predicate(UseSSE<=1);
11304  match(Set dst (MoveD2L src));
11305  effect(DEF dst, USE src);
11306
11307  ins_cost(125);
11308  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11309  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11310  ins_pipe( fpu_mem_reg );
11311%}
11312
11313instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11314  predicate(UseSSE>=2);
11315  match(Set dst (MoveD2L src));
11316  effect(DEF dst, USE src);
11317  ins_cost(95);
11318  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11319  ins_encode %{
11320    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11321  %}
11322  ins_pipe( pipe_slow );
11323%}
11324
11325instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11326  predicate(UseSSE>=2);
11327  match(Set dst (MoveD2L src));
11328  effect(DEF dst, USE src, TEMP tmp);
11329  ins_cost(85);
11330  format %{ "MOVD   $dst.lo,$src\n\t"
11331            "PSHUFLW $tmp,$src,0x4E\n\t"
11332            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11333  ins_encode %{
11334    __ movdl($dst$$Register, $src$$XMMRegister);
11335    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11336    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11337  %}
11338  ins_pipe( pipe_slow );
11339%}
11340
11341instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11342  match(Set dst (MoveL2D src));
11343  effect(DEF dst, USE src);
11344
11345  ins_cost(200);
11346  format %{ "MOV    $dst,$src.lo\n\t"
11347            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11348  opcode(0x89, 0x89);
11349  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11350  ins_pipe( ialu_mem_long_reg );
11351%}
11352
11353
11354instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11355  predicate(UseSSE<=1);
11356  match(Set dst (MoveL2D src));
11357  effect(DEF dst, USE src);
11358  ins_cost(125);
11359
11360  format %{ "FLD_D  $src\n\t"
11361            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11362  opcode(0xDD);               /* DD /0, FLD m64real */
11363  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11364              Pop_Reg_DPR(dst) );
11365  ins_pipe( fpu_reg_mem );
11366%}
11367
11368
11369instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11370  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11371  match(Set dst (MoveL2D src));
11372  effect(DEF dst, USE src);
11373
11374  ins_cost(95);
11375  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11376  ins_encode %{
11377    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11378  %}
11379  ins_pipe( pipe_slow );
11380%}
11381
11382instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11383  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11384  match(Set dst (MoveL2D src));
11385  effect(DEF dst, USE src);
11386
11387  ins_cost(95);
11388  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11389  ins_encode %{
11390    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11391  %}
11392  ins_pipe( pipe_slow );
11393%}
11394
11395instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11396  predicate(UseSSE>=2);
11397  match(Set dst (MoveL2D src));
11398  effect(TEMP dst, USE src, TEMP tmp);
11399  ins_cost(85);
11400  format %{ "MOVD   $dst,$src.lo\n\t"
11401            "MOVD   $tmp,$src.hi\n\t"
11402            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11403  ins_encode %{
11404    __ movdl($dst$$XMMRegister, $src$$Register);
11405    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11406    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11407  %}
11408  ins_pipe( pipe_slow );
11409%}
11410
11411
11412// =======================================================================
11413// fast clearing of an array
11414instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11415  predicate(!UseFastStosb);
11416  match(Set dummy (ClearArray cnt base));
11417  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11418  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11419            "SHL    ECX,1\t# Convert doublewords to words\n\t"
11420            "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11421  ins_encode %{
11422    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11423  %}
11424  ins_pipe( pipe_slow );
11425%}
11426
11427instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11428  predicate(UseFastStosb);
11429  match(Set dummy (ClearArray cnt base));
11430  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11431  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11432            "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11433            "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11434  ins_encode %{
11435    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11436  %}
11437  ins_pipe( pipe_slow );
11438%}
11439
11440instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11441                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11442  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11443  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11444  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11445
11446  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11447  ins_encode %{
11448    __ string_compare($str1$$Register, $str2$$Register,
11449                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11450                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11451  %}
11452  ins_pipe( pipe_slow );
11453%}
11454
11455instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11456                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11457  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11458  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11459  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11460
11461  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11462  ins_encode %{
11463    __ string_compare($str1$$Register, $str2$$Register,
11464                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11465                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11466  %}
11467  ins_pipe( pipe_slow );
11468%}
11469
11470instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11471                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11472  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11473  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11474  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11475
11476  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11477  ins_encode %{
11478    __ string_compare($str1$$Register, $str2$$Register,
11479                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11480                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11481  %}
11482  ins_pipe( pipe_slow );
11483%}
11484
11485instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11486                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11487  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11488  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11489  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11490
11491  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11492  ins_encode %{
11493    __ string_compare($str2$$Register, $str1$$Register,
11494                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11495                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11496  %}
11497  ins_pipe( pipe_slow );
11498%}
11499
11500// fast string equals
11501instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11502                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11503  match(Set result (StrEquals (Binary str1 str2) cnt));
11504  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11505
11506  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11507  ins_encode %{
11508    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11509                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11510                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11511  %} 
11512
11513  ins_pipe( pipe_slow );
11514%}
11515
11516// fast search of substring with known size.
11517instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11518                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11519  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11520  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11521  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11522
11523  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11524  ins_encode %{
11525    int icnt2 = (int)$int_cnt2$$constant;
11526    if (icnt2 >= 16) {
11527      // IndexOf for constant substrings with size >= 16 elements
11528      // which don't need to be loaded through stack.
11529      __ string_indexofC8($str1$$Register, $str2$$Register,
11530                          $cnt1$$Register, $cnt2$$Register,
11531                          icnt2, $result$$Register,
11532                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11533    } else {
11534      // Small strings are loaded through stack if they cross page boundary.
11535      __ string_indexof($str1$$Register, $str2$$Register,
11536                        $cnt1$$Register, $cnt2$$Register,
11537                        icnt2, $result$$Register,
11538                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11539    }
11540  %}
11541  ins_pipe( pipe_slow );
11542%}
11543
11544// fast search of substring with known size.
11545instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11546                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11547  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11548  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11549  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11550
11551  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11552  ins_encode %{
11553    int icnt2 = (int)$int_cnt2$$constant;
11554    if (icnt2 >= 8) {
11555      // IndexOf for constant substrings with size >= 8 elements
11556      // which don't need to be loaded through stack.
11557      __ string_indexofC8($str1$$Register, $str2$$Register,
11558                          $cnt1$$Register, $cnt2$$Register,
11559                          icnt2, $result$$Register,
11560                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11561    } else {
11562      // Small strings are loaded through stack if they cross page boundary.
11563      __ string_indexof($str1$$Register, $str2$$Register,
11564                        $cnt1$$Register, $cnt2$$Register,
11565                        icnt2, $result$$Register,
11566                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11567    }
11568  %}
11569  ins_pipe( pipe_slow );
11570%}
11571
11572// fast search of substring with known size.
11573instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11574                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11575  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11576  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11577  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11578
11579  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11580  ins_encode %{
11581    int icnt2 = (int)$int_cnt2$$constant;
11582    if (icnt2 >= 8) {
11583      // IndexOf for constant substrings with size >= 8 elements
11584      // which don't need to be loaded through stack.
11585      __ string_indexofC8($str1$$Register, $str2$$Register,
11586                          $cnt1$$Register, $cnt2$$Register,
11587                          icnt2, $result$$Register,
11588                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11589    } else {
11590      // Small strings are loaded through stack if they cross page boundary.
11591      __ string_indexof($str1$$Register, $str2$$Register,
11592                        $cnt1$$Register, $cnt2$$Register,
11593                        icnt2, $result$$Register,
11594                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11595    }
11596  %}
11597  ins_pipe( pipe_slow );
11598%}
11599
11600instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11601                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11602  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11603  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11604  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11605
11606  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11607  ins_encode %{
11608    __ string_indexof($str1$$Register, $str2$$Register,
11609                      $cnt1$$Register, $cnt2$$Register,
11610                      (-1), $result$$Register,
11611                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11612  %}
11613  ins_pipe( pipe_slow );
11614%}
11615
11616instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11617                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11618  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11619  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11620  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11621
11622  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11623  ins_encode %{
11624    __ string_indexof($str1$$Register, $str2$$Register,
11625                      $cnt1$$Register, $cnt2$$Register,
11626                      (-1), $result$$Register,
11627                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11628  %}
11629  ins_pipe( pipe_slow );
11630%}
11631
11632instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11633                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11634  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11635  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11636  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11637
11638  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11639  ins_encode %{
11640    __ string_indexof($str1$$Register, $str2$$Register,
11641                      $cnt1$$Register, $cnt2$$Register,
11642                      (-1), $result$$Register,
11643                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11644  %}
11645  ins_pipe( pipe_slow );
11646%}
11647
11648instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11649                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11650  predicate(UseSSE42Intrinsics);
11651  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11652  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11653  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11654  ins_encode %{
11655    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11656                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11657  %}
11658  ins_pipe( pipe_slow );
11659%}
11660
11661// fast array equals
11662instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11663                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11664%{
11665  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11666  match(Set result (AryEq ary1 ary2));
11667  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11668  //ins_cost(300);
11669
11670  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11671  ins_encode %{
11672    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11673                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11674                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11675  %}
11676  ins_pipe( pipe_slow );
11677%}
11678
11679instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11680                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11681%{
11682  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11683  match(Set result (AryEq ary1 ary2));
11684  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11685  //ins_cost(300);
11686
11687  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11688  ins_encode %{
11689    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11690                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11691                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11692  %}
11693  ins_pipe( pipe_slow );
11694%}
11695
11696instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11697                      regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11698%{
11699  match(Set result (HasNegatives ary1 len));
11700  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11701
11702  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11703  ins_encode %{
11704    __ has_negatives($ary1$$Register, $len$$Register,
11705                     $result$$Register, $tmp3$$Register,
11706                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11707  %}
11708  ins_pipe( pipe_slow );
11709%}
11710
11711// fast char[] to byte[] compression
11712instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11713                         eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11714  match(Set result (StrCompressedCopy src (Binary dst len)));
11715  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11716
11717  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11718  ins_encode %{
11719    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11720                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11721                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11722  %}
11723  ins_pipe( pipe_slow );
11724%}
11725
11726// fast byte[] to char[] inflation
11727instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11728                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11729  match(Set dummy (StrInflatedCopy src (Binary dst len)));
11730  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11731
11732  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11733  ins_encode %{
11734    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11735                          $tmp1$$XMMRegister, $tmp2$$Register);
11736  %}
11737  ins_pipe( pipe_slow );
11738%}
11739
11740// encode char[] to byte[] in ISO_8859_1
11741instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11742                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11743                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11744  match(Set result (EncodeISOArray src (Binary dst len)));
11745  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11746
11747  format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11748  ins_encode %{
11749    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11750                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11751                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11752  %}
11753  ins_pipe( pipe_slow );
11754%}
11755
11756
11757//----------Control Flow Instructions------------------------------------------
11758// Signed compare Instructions
11759instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11760  match(Set cr (CmpI op1 op2));
11761  effect( DEF cr, USE op1, USE op2 );
11762  format %{ "CMP    $op1,$op2" %}
11763  opcode(0x3B);  /* Opcode 3B /r */
11764  ins_encode( OpcP, RegReg( op1, op2) );
11765  ins_pipe( ialu_cr_reg_reg );
11766%}
11767
11768instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11769  match(Set cr (CmpI op1 op2));
11770  effect( DEF cr, USE op1 );
11771  format %{ "CMP    $op1,$op2" %}
11772  opcode(0x81,0x07);  /* Opcode 81 /7 */
11773  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11774  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11775  ins_pipe( ialu_cr_reg_imm );
11776%}
11777
11778// Cisc-spilled version of cmpI_eReg
11779instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11780  match(Set cr (CmpI op1 (LoadI op2)));
11781
11782  format %{ "CMP    $op1,$op2" %}
11783  ins_cost(500);
11784  opcode(0x3B);  /* Opcode 3B /r */
11785  ins_encode( OpcP, RegMem( op1, op2) );
11786  ins_pipe( ialu_cr_reg_mem );
11787%}
11788
11789instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11790  match(Set cr (CmpI src zero));
11791  effect( DEF cr, USE src );
11792
11793  format %{ "TEST   $src,$src" %}
11794  opcode(0x85);
11795  ins_encode( OpcP, RegReg( src, src ) );
11796  ins_pipe( ialu_cr_reg_imm );
11797%}
11798
11799instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11800  match(Set cr (CmpI (AndI src con) zero));
11801
11802  format %{ "TEST   $src,$con" %}
11803  opcode(0xF7,0x00);
11804  ins_encode( OpcP, RegOpc(src), Con32(con) );
11805  ins_pipe( ialu_cr_reg_imm );
11806%}
11807
11808instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11809  match(Set cr (CmpI (AndI src mem) zero));
11810
11811  format %{ "TEST   $src,$mem" %}
11812  opcode(0x85);
11813  ins_encode( OpcP, RegMem( src, mem ) );
11814  ins_pipe( ialu_cr_reg_mem );
11815%}
11816
11817// Unsigned compare Instructions; really, same as signed except they
11818// produce an eFlagsRegU instead of eFlagsReg.
11819instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11820  match(Set cr (CmpU op1 op2));
11821
11822  format %{ "CMPu   $op1,$op2" %}
11823  opcode(0x3B);  /* Opcode 3B /r */
11824  ins_encode( OpcP, RegReg( op1, op2) );
11825  ins_pipe( ialu_cr_reg_reg );
11826%}
11827
11828instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11829  match(Set cr (CmpU op1 op2));
11830
11831  format %{ "CMPu   $op1,$op2" %}
11832  opcode(0x81,0x07);  /* Opcode 81 /7 */
11833  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11834  ins_pipe( ialu_cr_reg_imm );
11835%}
11836
11837// // Cisc-spilled version of cmpU_eReg
11838instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11839  match(Set cr (CmpU op1 (LoadI op2)));
11840
11841  format %{ "CMPu   $op1,$op2" %}
11842  ins_cost(500);
11843  opcode(0x3B);  /* Opcode 3B /r */
11844  ins_encode( OpcP, RegMem( op1, op2) );
11845  ins_pipe( ialu_cr_reg_mem );
11846%}
11847
11848// // Cisc-spilled version of cmpU_eReg
11849//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11850//  match(Set cr (CmpU (LoadI op1) op2));
11851//
11852//  format %{ "CMPu   $op1,$op2" %}
11853//  ins_cost(500);
11854//  opcode(0x39);  /* Opcode 39 /r */
11855//  ins_encode( OpcP, RegMem( op1, op2) );
11856//%}
11857
11858instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11859  match(Set cr (CmpU src zero));
11860
11861  format %{ "TESTu  $src,$src" %}
11862  opcode(0x85);
11863  ins_encode( OpcP, RegReg( src, src ) );
11864  ins_pipe( ialu_cr_reg_imm );
11865%}
11866
11867// Unsigned pointer compare Instructions
11868instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11869  match(Set cr (CmpP op1 op2));
11870
11871  format %{ "CMPu   $op1,$op2" %}
11872  opcode(0x3B);  /* Opcode 3B /r */
11873  ins_encode( OpcP, RegReg( op1, op2) );
11874  ins_pipe( ialu_cr_reg_reg );
11875%}
11876
11877instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11878  match(Set cr (CmpP op1 op2));
11879
11880  format %{ "CMPu   $op1,$op2" %}
11881  opcode(0x81,0x07);  /* Opcode 81 /7 */
11882  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11883  ins_pipe( ialu_cr_reg_imm );
11884%}
11885
11886// // Cisc-spilled version of cmpP_eReg
11887instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11888  match(Set cr (CmpP op1 (LoadP op2)));
11889
11890  format %{ "CMPu   $op1,$op2" %}
11891  ins_cost(500);
11892  opcode(0x3B);  /* Opcode 3B /r */
11893  ins_encode( OpcP, RegMem( op1, op2) );
11894  ins_pipe( ialu_cr_reg_mem );
11895%}
11896
11897// // Cisc-spilled version of cmpP_eReg
11898//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11899//  match(Set cr (CmpP (LoadP op1) op2));
11900//
11901//  format %{ "CMPu   $op1,$op2" %}
11902//  ins_cost(500);
11903//  opcode(0x39);  /* Opcode 39 /r */
11904//  ins_encode( OpcP, RegMem( op1, op2) );
11905//%}
11906
11907// Compare raw pointer (used in out-of-heap check).
11908// Only works because non-oop pointers must be raw pointers
11909// and raw pointers have no anti-dependencies.
11910instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11911  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11912  match(Set cr (CmpP op1 (LoadP op2)));
11913
11914  format %{ "CMPu   $op1,$op2" %}
11915  opcode(0x3B);  /* Opcode 3B /r */
11916  ins_encode( OpcP, RegMem( op1, op2) );
11917  ins_pipe( ialu_cr_reg_mem );
11918%}
11919
11920//
11921// This will generate a signed flags result. This should be ok
11922// since any compare to a zero should be eq/neq.
11923instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11924  match(Set cr (CmpP src zero));
11925
11926  format %{ "TEST   $src,$src" %}
11927  opcode(0x85);
11928  ins_encode( OpcP, RegReg( src, src ) );
11929  ins_pipe( ialu_cr_reg_imm );
11930%}
11931
11932// Cisc-spilled version of testP_reg
11933// This will generate a signed flags result. This should be ok
11934// since any compare to a zero should be eq/neq.
11935instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11936  match(Set cr (CmpP (LoadP op) zero));
11937
11938  format %{ "TEST   $op,0xFFFFFFFF" %}
11939  ins_cost(500);
11940  opcode(0xF7);               /* Opcode F7 /0 */
11941  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11942  ins_pipe( ialu_cr_reg_imm );
11943%}
11944
11945// Yanked all unsigned pointer compare operations.
11946// Pointer compares are done with CmpP which is already unsigned.
11947
11948//----------Max and Min--------------------------------------------------------
11949// Min Instructions
11950////
11951//   *** Min and Max using the conditional move are slower than the
11952//   *** branch version on a Pentium III.
11953// // Conditional move for min
11954//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11955//  effect( USE_DEF op2, USE op1, USE cr );
11956//  format %{ "CMOVlt $op2,$op1\t! min" %}
11957//  opcode(0x4C,0x0F);
11958//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11959//  ins_pipe( pipe_cmov_reg );
11960//%}
11961//
11962//// Min Register with Register (P6 version)
11963//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11964//  predicate(VM_Version::supports_cmov() );
11965//  match(Set op2 (MinI op1 op2));
11966//  ins_cost(200);
11967//  expand %{
11968//    eFlagsReg cr;
11969//    compI_eReg(cr,op1,op2);
11970//    cmovI_reg_lt(op2,op1,cr);
11971//  %}
11972//%}
11973
11974// Min Register with Register (generic version)
11975instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11976  match(Set dst (MinI dst src));
11977  effect(KILL flags);
11978  ins_cost(300);
11979
11980  format %{ "MIN    $dst,$src" %}
11981  opcode(0xCC);
11982  ins_encode( min_enc(dst,src) );
11983  ins_pipe( pipe_slow );
11984%}
11985
11986// Max Register with Register
11987//   *** Min and Max using the conditional move are slower than the
11988//   *** branch version on a Pentium III.
11989// // Conditional move for max
11990//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11991//  effect( USE_DEF op2, USE op1, USE cr );
11992//  format %{ "CMOVgt $op2,$op1\t! max" %}
11993//  opcode(0x4F,0x0F);
11994//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11995//  ins_pipe( pipe_cmov_reg );
11996//%}
11997//
11998// // Max Register with Register (P6 version)
11999//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12000//  predicate(VM_Version::supports_cmov() );
12001//  match(Set op2 (MaxI op1 op2));
12002//  ins_cost(200);
12003//  expand %{
12004//    eFlagsReg cr;
12005//    compI_eReg(cr,op1,op2);
12006//    cmovI_reg_gt(op2,op1,cr);
12007//  %}
12008//%}
12009
12010// Max Register with Register (generic version)
12011instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12012  match(Set dst (MaxI dst src));
12013  effect(KILL flags);
12014  ins_cost(300);
12015
12016  format %{ "MAX    $dst,$src" %}
12017  opcode(0xCC);
12018  ins_encode( max_enc(dst,src) );
12019  ins_pipe( pipe_slow );
12020%}
12021
12022// ============================================================================
12023// Counted Loop limit node which represents exact final iterator value.
12024// Note: the resulting value should fit into integer range since
12025// counted loops have limit check on overflow.
12026instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12027  match(Set limit (LoopLimit (Binary init limit) stride));
12028  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12029  ins_cost(300);
12030
12031  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12032  ins_encode %{
12033    int strd = (int)$stride$$constant;
12034    assert(strd != 1 && strd != -1, "sanity");
12035    int m1 = (strd > 0) ? 1 : -1;
12036    // Convert limit to long (EAX:EDX)
12037    __ cdql();
12038    // Convert init to long (init:tmp)
12039    __ movl($tmp$$Register, $init$$Register);
12040    __ sarl($tmp$$Register, 31);
12041    // $limit - $init
12042    __ subl($limit$$Register, $init$$Register);
12043    __ sbbl($limit_hi$$Register, $tmp$$Register);
12044    // + ($stride - 1)
12045    if (strd > 0) {
12046      __ addl($limit$$Register, (strd - 1));
12047      __ adcl($limit_hi$$Register, 0);
12048      __ movl($tmp$$Register, strd);
12049    } else {
12050      __ addl($limit$$Register, (strd + 1));
12051      __ adcl($limit_hi$$Register, -1);
12052      __ lneg($limit_hi$$Register, $limit$$Register);
12053      __ movl($tmp$$Register, -strd);
12054    }
12055    // signed devision: (EAX:EDX) / pos_stride
12056    __ idivl($tmp$$Register);
12057    if (strd < 0) {
12058      // restore sign
12059      __ negl($tmp$$Register);
12060    }
12061    // (EAX) * stride
12062    __ mull($tmp$$Register);
12063    // + init (ignore upper bits)
12064    __ addl($limit$$Register, $init$$Register);
12065  %}
12066  ins_pipe( pipe_slow );
12067%}
12068
12069// ============================================================================
12070// Branch Instructions
12071// Jump Table
12072instruct jumpXtnd(rRegI switch_val) %{
12073  match(Jump switch_val);
12074  ins_cost(350);
12075  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12076  ins_encode %{
12077    // Jump to Address(table_base + switch_reg)
12078    Address index(noreg, $switch_val$$Register, Address::times_1);
12079    __ jump(ArrayAddress($constantaddress, index));
12080  %}
12081  ins_pipe(pipe_jmp);
12082%}
12083
12084// Jump Direct - Label defines a relative address from JMP+1
12085instruct jmpDir(label labl) %{
12086  match(Goto);
12087  effect(USE labl);
12088
12089  ins_cost(300);
12090  format %{ "JMP    $labl" %}
12091  size(5);
12092  ins_encode %{
12093    Label* L = $labl$$label;
12094    __ jmp(*L, false); // Always long jump
12095  %}
12096  ins_pipe( pipe_jmp );
12097%}
12098
12099// Jump Direct Conditional - Label defines a relative address from Jcc+1
12100instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12101  match(If cop cr);
12102  effect(USE labl);
12103
12104  ins_cost(300);
12105  format %{ "J$cop    $labl" %}
12106  size(6);
12107  ins_encode %{
12108    Label* L = $labl$$label;
12109    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12110  %}
12111  ins_pipe( pipe_jcc );
12112%}
12113
12114// Jump Direct Conditional - Label defines a relative address from Jcc+1
12115instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12116  match(CountedLoopEnd cop cr);
12117  effect(USE labl);
12118
12119  ins_cost(300);
12120  format %{ "J$cop    $labl\t# Loop end" %}
12121  size(6);
12122  ins_encode %{
12123    Label* L = $labl$$label;
12124    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12125  %}
12126  ins_pipe( pipe_jcc );
12127%}
12128
12129// Jump Direct Conditional - Label defines a relative address from Jcc+1
12130instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12131  match(CountedLoopEnd cop cmp);
12132  effect(USE labl);
12133
12134  ins_cost(300);
12135  format %{ "J$cop,u  $labl\t# Loop end" %}
12136  size(6);
12137  ins_encode %{
12138    Label* L = $labl$$label;
12139    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12140  %}
12141  ins_pipe( pipe_jcc );
12142%}
12143
12144instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12145  match(CountedLoopEnd cop cmp);
12146  effect(USE labl);
12147
12148  ins_cost(200);
12149  format %{ "J$cop,u  $labl\t# Loop end" %}
12150  size(6);
12151  ins_encode %{
12152    Label* L = $labl$$label;
12153    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12154  %}
12155  ins_pipe( pipe_jcc );
12156%}
12157
12158// Jump Direct Conditional - using unsigned comparison
12159instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12160  match(If cop cmp);
12161  effect(USE labl);
12162
12163  ins_cost(300);
12164  format %{ "J$cop,u  $labl" %}
12165  size(6);
12166  ins_encode %{
12167    Label* L = $labl$$label;
12168    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12169  %}
12170  ins_pipe(pipe_jcc);
12171%}
12172
12173instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12174  match(If cop cmp);
12175  effect(USE labl);
12176
12177  ins_cost(200);
12178  format %{ "J$cop,u  $labl" %}
12179  size(6);
12180  ins_encode %{
12181    Label* L = $labl$$label;
12182    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12183  %}
12184  ins_pipe(pipe_jcc);
12185%}
12186
12187instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12188  match(If cop cmp);
12189  effect(USE labl);
12190
12191  ins_cost(200);
12192  format %{ $$template
12193    if ($cop$$cmpcode == Assembler::notEqual) {
12194      $$emit$$"JP,u   $labl\n\t"
12195      $$emit$$"J$cop,u   $labl"
12196    } else {
12197      $$emit$$"JP,u   done\n\t"
12198      $$emit$$"J$cop,u   $labl\n\t"
12199      $$emit$$"done:"
12200    }
12201  %}
12202  ins_encode %{
12203    Label* l = $labl$$label;
12204    if ($cop$$cmpcode == Assembler::notEqual) {
12205      __ jcc(Assembler::parity, *l, false);
12206      __ jcc(Assembler::notEqual, *l, false);
12207    } else if ($cop$$cmpcode == Assembler::equal) {
12208      Label done;
12209      __ jccb(Assembler::parity, done);
12210      __ jcc(Assembler::equal, *l, false);
12211      __ bind(done);
12212    } else {
12213       ShouldNotReachHere();
12214    }
12215  %}
12216  ins_pipe(pipe_jcc);
12217%}
12218
12219// ============================================================================
12220// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12221// array for an instance of the superklass.  Set a hidden internal cache on a
12222// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12223// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12224instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12225  match(Set result (PartialSubtypeCheck sub super));
12226  effect( KILL rcx, KILL cr );
12227
12228  ins_cost(1100);  // slightly larger than the next version
12229  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12230            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12231            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12232            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12233            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12234            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12235            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12236     "miss:\t" %}
12237
12238  opcode(0x1); // Force a XOR of EDI
12239  ins_encode( enc_PartialSubtypeCheck() );
12240  ins_pipe( pipe_slow );
12241%}
12242
12243instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12244  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12245  effect( KILL rcx, KILL result );
12246
12247  ins_cost(1000);
12248  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12249            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12250            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12251            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12252            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12253            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12254     "miss:\t" %}
12255
12256  opcode(0x0);  // No need to XOR EDI
12257  ins_encode( enc_PartialSubtypeCheck() );
12258  ins_pipe( pipe_slow );
12259%}
12260
12261// ============================================================================
12262// Branch Instructions -- short offset versions
12263//
12264// These instructions are used to replace jumps of a long offset (the default
12265// match) with jumps of a shorter offset.  These instructions are all tagged
12266// with the ins_short_branch attribute, which causes the ADLC to suppress the
12267// match rules in general matching.  Instead, the ADLC generates a conversion
12268// method in the MachNode which can be used to do in-place replacement of the
12269// long variant with the shorter variant.  The compiler will determine if a
12270// branch can be taken by the is_short_branch_offset() predicate in the machine
12271// specific code section of the file.
12272
12273// Jump Direct - Label defines a relative address from JMP+1
12274instruct jmpDir_short(label labl) %{
12275  match(Goto);
12276  effect(USE labl);
12277
12278  ins_cost(300);
12279  format %{ "JMP,s  $labl" %}
12280  size(2);
12281  ins_encode %{
12282    Label* L = $labl$$label;
12283    __ jmpb(*L);
12284  %}
12285  ins_pipe( pipe_jmp );
12286  ins_short_branch(1);
12287%}
12288
12289// Jump Direct Conditional - Label defines a relative address from Jcc+1
12290instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12291  match(If cop cr);
12292  effect(USE labl);
12293
12294  ins_cost(300);
12295  format %{ "J$cop,s  $labl" %}
12296  size(2);
12297  ins_encode %{
12298    Label* L = $labl$$label;
12299    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12300  %}
12301  ins_pipe( pipe_jcc );
12302  ins_short_branch(1);
12303%}
12304
12305// Jump Direct Conditional - Label defines a relative address from Jcc+1
12306instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12307  match(CountedLoopEnd cop cr);
12308  effect(USE labl);
12309
12310  ins_cost(300);
12311  format %{ "J$cop,s  $labl\t# Loop end" %}
12312  size(2);
12313  ins_encode %{
12314    Label* L = $labl$$label;
12315    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12316  %}
12317  ins_pipe( pipe_jcc );
12318  ins_short_branch(1);
12319%}
12320
12321// Jump Direct Conditional - Label defines a relative address from Jcc+1
12322instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12323  match(CountedLoopEnd cop cmp);
12324  effect(USE labl);
12325
12326  ins_cost(300);
12327  format %{ "J$cop,us $labl\t# Loop end" %}
12328  size(2);
12329  ins_encode %{
12330    Label* L = $labl$$label;
12331    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12332  %}
12333  ins_pipe( pipe_jcc );
12334  ins_short_branch(1);
12335%}
12336
12337instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12338  match(CountedLoopEnd cop cmp);
12339  effect(USE labl);
12340
12341  ins_cost(300);
12342  format %{ "J$cop,us $labl\t# Loop end" %}
12343  size(2);
12344  ins_encode %{
12345    Label* L = $labl$$label;
12346    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12347  %}
12348  ins_pipe( pipe_jcc );
12349  ins_short_branch(1);
12350%}
12351
12352// Jump Direct Conditional - using unsigned comparison
12353instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12354  match(If cop cmp);
12355  effect(USE labl);
12356
12357  ins_cost(300);
12358  format %{ "J$cop,us $labl" %}
12359  size(2);
12360  ins_encode %{
12361    Label* L = $labl$$label;
12362    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12363  %}
12364  ins_pipe( pipe_jcc );
12365  ins_short_branch(1);
12366%}
12367
12368instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12369  match(If cop cmp);
12370  effect(USE labl);
12371
12372  ins_cost(300);
12373  format %{ "J$cop,us $labl" %}
12374  size(2);
12375  ins_encode %{
12376    Label* L = $labl$$label;
12377    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12378  %}
12379  ins_pipe( pipe_jcc );
12380  ins_short_branch(1);
12381%}
12382
12383instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12384  match(If cop cmp);
12385  effect(USE labl);
12386
12387  ins_cost(300);
12388  format %{ $$template
12389    if ($cop$$cmpcode == Assembler::notEqual) {
12390      $$emit$$"JP,u,s   $labl\n\t"
12391      $$emit$$"J$cop,u,s   $labl"
12392    } else {
12393      $$emit$$"JP,u,s   done\n\t"
12394      $$emit$$"J$cop,u,s  $labl\n\t"
12395      $$emit$$"done:"
12396    }
12397  %}
12398  size(4);
12399  ins_encode %{
12400    Label* l = $labl$$label;
12401    if ($cop$$cmpcode == Assembler::notEqual) {
12402      __ jccb(Assembler::parity, *l);
12403      __ jccb(Assembler::notEqual, *l);
12404    } else if ($cop$$cmpcode == Assembler::equal) {
12405      Label done;
12406      __ jccb(Assembler::parity, done);
12407      __ jccb(Assembler::equal, *l);
12408      __ bind(done);
12409    } else {
12410       ShouldNotReachHere();
12411    }
12412  %}
12413  ins_pipe(pipe_jcc);
12414  ins_short_branch(1);
12415%}
12416
12417// ============================================================================
12418// Long Compare
12419//
12420// Currently we hold longs in 2 registers.  Comparing such values efficiently
12421// is tricky.  The flavor of compare used depends on whether we are testing
12422// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12423// The GE test is the negated LT test.  The LE test can be had by commuting
12424// the operands (yielding a GE test) and then negating; negate again for the
12425// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12426// NE test is negated from that.
12427
12428// Due to a shortcoming in the ADLC, it mixes up expressions like:
12429// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12430// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12431// are collapsed internally in the ADLC's dfa-gen code.  The match for
12432// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12433// foo match ends up with the wrong leaf.  One fix is to not match both
12434// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12435// both forms beat the trinary form of long-compare and both are very useful
12436// on Intel which has so few registers.
12437
12438// Manifest a CmpL result in an integer register.  Very painful.
12439// This is the test to avoid.
12440instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12441  match(Set dst (CmpL3 src1 src2));
12442  effect( KILL flags );
12443  ins_cost(1000);
12444  format %{ "XOR    $dst,$dst\n\t"
12445            "CMP    $src1.hi,$src2.hi\n\t"
12446            "JLT,s  m_one\n\t"
12447            "JGT,s  p_one\n\t"
12448            "CMP    $src1.lo,$src2.lo\n\t"
12449            "JB,s   m_one\n\t"
12450            "JEQ,s  done\n"
12451    "p_one:\tINC    $dst\n\t"
12452            "JMP,s  done\n"
12453    "m_one:\tDEC    $dst\n"
12454     "done:" %}
12455  ins_encode %{
12456    Label p_one, m_one, done;
12457    __ xorptr($dst$$Register, $dst$$Register);
12458    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12459    __ jccb(Assembler::less,    m_one);
12460    __ jccb(Assembler::greater, p_one);
12461    __ cmpl($src1$$Register, $src2$$Register);
12462    __ jccb(Assembler::below,   m_one);
12463    __ jccb(Assembler::equal,   done);
12464    __ bind(p_one);
12465    __ incrementl($dst$$Register);
12466    __ jmpb(done);
12467    __ bind(m_one);
12468    __ decrementl($dst$$Register);
12469    __ bind(done);
12470  %}
12471  ins_pipe( pipe_slow );
12472%}
12473
12474//======
12475// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12476// compares.  Can be used for LE or GT compares by reversing arguments.
12477// NOT GOOD FOR EQ/NE tests.
12478instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12479  match( Set flags (CmpL src zero ));
12480  ins_cost(100);
12481  format %{ "TEST   $src.hi,$src.hi" %}
12482  opcode(0x85);
12483  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12484  ins_pipe( ialu_cr_reg_reg );
12485%}
12486
12487// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12488// compares.  Can be used for LE or GT compares by reversing arguments.
12489// NOT GOOD FOR EQ/NE tests.
12490instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12491  match( Set flags (CmpL src1 src2 ));
12492  effect( TEMP tmp );
12493  ins_cost(300);
12494  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12495            "MOV    $tmp,$src1.hi\n\t"
12496            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12497  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12498  ins_pipe( ialu_cr_reg_reg );
12499%}
12500
12501// Long compares reg < zero/req OR reg >= zero/req.
12502// Just a wrapper for a normal branch, plus the predicate test.
12503instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12504  match(If cmp flags);
12505  effect(USE labl);
12506  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12507  expand %{
12508    jmpCon(cmp,flags,labl);    // JLT or JGE...
12509  %}
12510%}
12511
12512// Compare 2 longs and CMOVE longs.
12513instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12514  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12515  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12516  ins_cost(400);
12517  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12518            "CMOV$cmp $dst.hi,$src.hi" %}
12519  opcode(0x0F,0x40);
12520  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12521  ins_pipe( pipe_cmov_reg_long );
12522%}
12523
12524instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12525  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12526  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12527  ins_cost(500);
12528  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12529            "CMOV$cmp $dst.hi,$src.hi" %}
12530  opcode(0x0F,0x40);
12531  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12532  ins_pipe( pipe_cmov_reg_long );
12533%}
12534
12535// Compare 2 longs and CMOVE ints.
12536instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12537  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12538  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12539  ins_cost(200);
12540  format %{ "CMOV$cmp $dst,$src" %}
12541  opcode(0x0F,0x40);
12542  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12543  ins_pipe( pipe_cmov_reg );
12544%}
12545
12546instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12547  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12548  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12549  ins_cost(250);
12550  format %{ "CMOV$cmp $dst,$src" %}
12551  opcode(0x0F,0x40);
12552  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12553  ins_pipe( pipe_cmov_mem );
12554%}
12555
12556// Compare 2 longs and CMOVE ints.
12557instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12558  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12559  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12560  ins_cost(200);
12561  format %{ "CMOV$cmp $dst,$src" %}
12562  opcode(0x0F,0x40);
12563  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12564  ins_pipe( pipe_cmov_reg );
12565%}
12566
12567// Compare 2 longs and CMOVE doubles
12568instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12569  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12570  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12571  ins_cost(200);
12572  expand %{
12573    fcmovDPR_regS(cmp,flags,dst,src);
12574  %}
12575%}
12576
12577// Compare 2 longs and CMOVE doubles
12578instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12579  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12580  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12581  ins_cost(200);
12582  expand %{
12583    fcmovD_regS(cmp,flags,dst,src);
12584  %}
12585%}
12586
12587instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12588  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12589  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12590  ins_cost(200);
12591  expand %{
12592    fcmovFPR_regS(cmp,flags,dst,src);
12593  %}
12594%}
12595
12596instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12597  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12598  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12599  ins_cost(200);
12600  expand %{
12601    fcmovF_regS(cmp,flags,dst,src);
12602  %}
12603%}
12604
12605//======
12606// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12607instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12608  match( Set flags (CmpL src zero ));
12609  effect(TEMP tmp);
12610  ins_cost(200);
12611  format %{ "MOV    $tmp,$src.lo\n\t"
12612            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12613  ins_encode( long_cmp_flags0( src, tmp ) );
12614  ins_pipe( ialu_reg_reg_long );
12615%}
12616
12617// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12618instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12619  match( Set flags (CmpL src1 src2 ));
12620  ins_cost(200+300);
12621  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12622            "JNE,s  skip\n\t"
12623            "CMP    $src1.hi,$src2.hi\n\t"
12624     "skip:\t" %}
12625  ins_encode( long_cmp_flags1( src1, src2 ) );
12626  ins_pipe( ialu_cr_reg_reg );
12627%}
12628
12629// Long compare reg == zero/reg OR reg != zero/reg
12630// Just a wrapper for a normal branch, plus the predicate test.
12631instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12632  match(If cmp flags);
12633  effect(USE labl);
12634  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12635  expand %{
12636    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12637  %}
12638%}
12639
12640// Compare 2 longs and CMOVE longs.
12641instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12642  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12643  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12644  ins_cost(400);
12645  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12646            "CMOV$cmp $dst.hi,$src.hi" %}
12647  opcode(0x0F,0x40);
12648  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12649  ins_pipe( pipe_cmov_reg_long );
12650%}
12651
12652instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12653  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12654  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12655  ins_cost(500);
12656  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12657            "CMOV$cmp $dst.hi,$src.hi" %}
12658  opcode(0x0F,0x40);
12659  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12660  ins_pipe( pipe_cmov_reg_long );
12661%}
12662
12663// Compare 2 longs and CMOVE ints.
12664instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12665  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12666  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12667  ins_cost(200);
12668  format %{ "CMOV$cmp $dst,$src" %}
12669  opcode(0x0F,0x40);
12670  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12671  ins_pipe( pipe_cmov_reg );
12672%}
12673
12674instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12675  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12676  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12677  ins_cost(250);
12678  format %{ "CMOV$cmp $dst,$src" %}
12679  opcode(0x0F,0x40);
12680  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12681  ins_pipe( pipe_cmov_mem );
12682%}
12683
12684// Compare 2 longs and CMOVE ints.
12685instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12686  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12687  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12688  ins_cost(200);
12689  format %{ "CMOV$cmp $dst,$src" %}
12690  opcode(0x0F,0x40);
12691  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12692  ins_pipe( pipe_cmov_reg );
12693%}
12694
12695// Compare 2 longs and CMOVE doubles
12696instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12697  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12698  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12699  ins_cost(200);
12700  expand %{
12701    fcmovDPR_regS(cmp,flags,dst,src);
12702  %}
12703%}
12704
12705// Compare 2 longs and CMOVE doubles
12706instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12707  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12708  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12709  ins_cost(200);
12710  expand %{
12711    fcmovD_regS(cmp,flags,dst,src);
12712  %}
12713%}
12714
12715instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12716  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12717  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12718  ins_cost(200);
12719  expand %{
12720    fcmovFPR_regS(cmp,flags,dst,src);
12721  %}
12722%}
12723
12724instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12725  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12726  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12727  ins_cost(200);
12728  expand %{
12729    fcmovF_regS(cmp,flags,dst,src);
12730  %}
12731%}
12732
12733//======
12734// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12735// Same as cmpL_reg_flags_LEGT except must negate src
12736instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12737  match( Set flags (CmpL src zero ));
12738  effect( TEMP tmp );
12739  ins_cost(300);
12740  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12741            "CMP    $tmp,$src.lo\n\t"
12742            "SBB    $tmp,$src.hi\n\t" %}
12743  ins_encode( long_cmp_flags3(src, tmp) );
12744  ins_pipe( ialu_reg_reg_long );
12745%}
12746
12747// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12748// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12749// requires a commuted test to get the same result.
12750instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12751  match( Set flags (CmpL src1 src2 ));
12752  effect( TEMP tmp );
12753  ins_cost(300);
12754  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12755            "MOV    $tmp,$src2.hi\n\t"
12756            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12757  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12758  ins_pipe( ialu_cr_reg_reg );
12759%}
12760
12761// Long compares reg < zero/req OR reg >= zero/req.
12762// Just a wrapper for a normal branch, plus the predicate test
12763instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12764  match(If cmp flags);
12765  effect(USE labl);
12766  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12767  ins_cost(300);
12768  expand %{
12769    jmpCon(cmp,flags,labl);    // JGT or JLE...
12770  %}
12771%}
12772
12773// Compare 2 longs and CMOVE longs.
12774instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12775  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12776  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12777  ins_cost(400);
12778  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12779            "CMOV$cmp $dst.hi,$src.hi" %}
12780  opcode(0x0F,0x40);
12781  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12782  ins_pipe( pipe_cmov_reg_long );
12783%}
12784
12785instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12786  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12787  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12788  ins_cost(500);
12789  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12790            "CMOV$cmp $dst.hi,$src.hi+4" %}
12791  opcode(0x0F,0x40);
12792  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12793  ins_pipe( pipe_cmov_reg_long );
12794%}
12795
12796// Compare 2 longs and CMOVE ints.
12797instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12798  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12799  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12800  ins_cost(200);
12801  format %{ "CMOV$cmp $dst,$src" %}
12802  opcode(0x0F,0x40);
12803  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12804  ins_pipe( pipe_cmov_reg );
12805%}
12806
12807instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12808  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12809  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12810  ins_cost(250);
12811  format %{ "CMOV$cmp $dst,$src" %}
12812  opcode(0x0F,0x40);
12813  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12814  ins_pipe( pipe_cmov_mem );
12815%}
12816
12817// Compare 2 longs and CMOVE ptrs.
12818instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12819  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12820  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12821  ins_cost(200);
12822  format %{ "CMOV$cmp $dst,$src" %}
12823  opcode(0x0F,0x40);
12824  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12825  ins_pipe( pipe_cmov_reg );
12826%}
12827
12828// Compare 2 longs and CMOVE doubles
12829instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12830  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12831  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12832  ins_cost(200);
12833  expand %{
12834    fcmovDPR_regS(cmp,flags,dst,src);
12835  %}
12836%}
12837
12838// Compare 2 longs and CMOVE doubles
12839instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12840  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12841  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12842  ins_cost(200);
12843  expand %{
12844    fcmovD_regS(cmp,flags,dst,src);
12845  %}
12846%}
12847
12848instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12849  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12850  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12851  ins_cost(200);
12852  expand %{
12853    fcmovFPR_regS(cmp,flags,dst,src);
12854  %}
12855%}
12856
12857
12858instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12859  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12860  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12861  ins_cost(200);
12862  expand %{
12863    fcmovF_regS(cmp,flags,dst,src);
12864  %}
12865%}
12866
12867
12868// ============================================================================
12869// Procedure Call/Return Instructions
12870// Call Java Static Instruction
12871// Note: If this code changes, the corresponding ret_addr_offset() and
12872//       compute_padding() functions will have to be adjusted.
12873instruct CallStaticJavaDirect(method meth) %{
12874  match(CallStaticJava);
12875  effect(USE meth);
12876
12877  ins_cost(300);
12878  format %{ "CALL,static " %}
12879  opcode(0xE8); /* E8 cd */
12880  ins_encode( pre_call_resets,
12881              Java_Static_Call( meth ),
12882              call_epilog,
12883              post_call_FPU );
12884  ins_pipe( pipe_slow );
12885  ins_alignment(4);
12886%}
12887
12888// Call Java Dynamic Instruction
12889// Note: If this code changes, the corresponding ret_addr_offset() and
12890//       compute_padding() functions will have to be adjusted.
12891instruct CallDynamicJavaDirect(method meth) %{
12892  match(CallDynamicJava);
12893  effect(USE meth);
12894
12895  ins_cost(300);
12896  format %{ "MOV    EAX,(oop)-1\n\t"
12897            "CALL,dynamic" %}
12898  opcode(0xE8); /* E8 cd */
12899  ins_encode( pre_call_resets,
12900              Java_Dynamic_Call( meth ),
12901              call_epilog,
12902              post_call_FPU );
12903  ins_pipe( pipe_slow );
12904  ins_alignment(4);
12905%}
12906
12907// Call Runtime Instruction
12908instruct CallRuntimeDirect(method meth) %{
12909  match(CallRuntime );
12910  effect(USE meth);
12911
12912  ins_cost(300);
12913  format %{ "CALL,runtime " %}
12914  opcode(0xE8); /* E8 cd */
12915  // Use FFREEs to clear entries in float stack
12916  ins_encode( pre_call_resets,
12917              FFree_Float_Stack_All,
12918              Java_To_Runtime( meth ),
12919              post_call_FPU );
12920  ins_pipe( pipe_slow );
12921%}
12922
12923// Call runtime without safepoint
12924instruct CallLeafDirect(method meth) %{
12925  match(CallLeaf);
12926  effect(USE meth);
12927
12928  ins_cost(300);
12929  format %{ "CALL_LEAF,runtime " %}
12930  opcode(0xE8); /* E8 cd */
12931  ins_encode( pre_call_resets,
12932              FFree_Float_Stack_All,
12933              Java_To_Runtime( meth ),
12934              Verify_FPU_For_Leaf, post_call_FPU );
12935  ins_pipe( pipe_slow );
12936%}
12937
12938instruct CallLeafNoFPDirect(method meth) %{
12939  match(CallLeafNoFP);
12940  effect(USE meth);
12941
12942  ins_cost(300);
12943  format %{ "CALL_LEAF_NOFP,runtime " %}
12944  opcode(0xE8); /* E8 cd */
12945  ins_encode(Java_To_Runtime(meth));
12946  ins_pipe( pipe_slow );
12947%}
12948
12949
12950// Return Instruction
12951// Remove the return address & jump to it.
12952instruct Ret() %{
12953  match(Return);
12954  format %{ "RET" %}
12955  opcode(0xC3);
12956  ins_encode(OpcP);
12957  ins_pipe( pipe_jmp );
12958%}
12959
12960// Tail Call; Jump from runtime stub to Java code.
12961// Also known as an 'interprocedural jump'.
12962// Target of jump will eventually return to caller.
12963// TailJump below removes the return address.
12964instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12965  match(TailCall jump_target method_oop );
12966  ins_cost(300);
12967  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12968  opcode(0xFF, 0x4);  /* Opcode FF /4 */
12969  ins_encode( OpcP, RegOpc(jump_target) );
12970  ins_pipe( pipe_jmp );
12971%}
12972
12973
12974// Tail Jump; remove the return address; jump to target.
12975// TailCall above leaves the return address around.
12976instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12977  match( TailJump jump_target ex_oop );
12978  ins_cost(300);
12979  format %{ "POP    EDX\t# pop return address into dummy\n\t"
12980            "JMP    $jump_target " %}
12981  opcode(0xFF, 0x4);  /* Opcode FF /4 */
12982  ins_encode( enc_pop_rdx,
12983              OpcP, RegOpc(jump_target) );
12984  ins_pipe( pipe_jmp );
12985%}
12986
12987// Create exception oop: created by stack-crawling runtime code.
12988// Created exception is now available to this handler, and is setup
12989// just prior to jumping to this handler.  No code emitted.
12990instruct CreateException( eAXRegP ex_oop )
12991%{
12992  match(Set ex_oop (CreateEx));
12993
12994  size(0);
12995  // use the following format syntax
12996  format %{ "# exception oop is in EAX; no code emitted" %}
12997  ins_encode();
12998  ins_pipe( empty );
12999%}
13000
13001
13002// Rethrow exception:
13003// The exception oop will come in the first argument position.
13004// Then JUMP (not call) to the rethrow stub code.
13005instruct RethrowException()
13006%{
13007  match(Rethrow);
13008
13009  // use the following format syntax
13010  format %{ "JMP    rethrow_stub" %}
13011  ins_encode(enc_rethrow);
13012  ins_pipe( pipe_jmp );
13013%}
13014
13015// inlined locking and unlocking
13016
13017instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13018  predicate(Compile::current()->use_rtm());
13019  match(Set cr (FastLock object box));
13020  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13021  ins_cost(300);
13022  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13023  ins_encode %{
13024    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13025                 $scr$$Register, $cx1$$Register, $cx2$$Register,
13026                 _counters, _rtm_counters, _stack_rtm_counters,
13027                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13028                 true, ra_->C->profile_rtm());
13029  %}
13030  ins_pipe(pipe_slow);
13031%}
13032
13033instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13034  predicate(!Compile::current()->use_rtm());
13035  match(Set cr (FastLock object box));
13036  effect(TEMP tmp, TEMP scr, USE_KILL box);
13037  ins_cost(300);
13038  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13039  ins_encode %{
13040    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13041                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13042  %}
13043  ins_pipe(pipe_slow);
13044%}
13045
13046instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13047  match(Set cr (FastUnlock object box));
13048  effect(TEMP tmp, USE_KILL box);
13049  ins_cost(300);
13050  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13051  ins_encode %{
13052    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13053  %}
13054  ins_pipe(pipe_slow);
13055%}
13056
13057
13058
13059// ============================================================================
13060// Safepoint Instruction
13061instruct safePoint_poll(eFlagsReg cr) %{
13062  match(SafePoint);
13063  effect(KILL cr);
13064
13065  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13066  // On SPARC that might be acceptable as we can generate the address with
13067  // just a sethi, saving an or.  By polling at offset 0 we can end up
13068  // putting additional pressure on the index-0 in the D$.  Because of
13069  // alignment (just like the situation at hand) the lower indices tend
13070  // to see more traffic.  It'd be better to change the polling address
13071  // to offset 0 of the last $line in the polling page.
13072
13073  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13074  ins_cost(125);
13075  size(6) ;
13076  ins_encode( Safepoint_Poll() );
13077  ins_pipe( ialu_reg_mem );
13078%}
13079
13080
13081// ============================================================================
13082// This name is KNOWN by the ADLC and cannot be changed.
13083// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13084// for this guy.
13085instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13086  match(Set dst (ThreadLocal));
13087  effect(DEF dst, KILL cr);
13088
13089  format %{ "MOV    $dst, Thread::current()" %}
13090  ins_encode %{
13091    Register dstReg = as_Register($dst$$reg);
13092    __ get_thread(dstReg);
13093  %}
13094  ins_pipe( ialu_reg_fat );
13095%}
13096
13097
13098
13099//----------PEEPHOLE RULES-----------------------------------------------------
13100// These must follow all instruction definitions as they use the names
13101// defined in the instructions definitions.
13102//
13103// peepmatch ( root_instr_name [preceding_instruction]* );
13104//
13105// peepconstraint %{
13106// (instruction_number.operand_name relational_op instruction_number.operand_name
13107//  [, ...] );
13108// // instruction numbers are zero-based using left to right order in peepmatch
13109//
13110// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13111// // provide an instruction_number.operand_name for each operand that appears
13112// // in the replacement instruction's match rule
13113//
13114// ---------VM FLAGS---------------------------------------------------------
13115//
13116// All peephole optimizations can be turned off using -XX:-OptoPeephole
13117//
13118// Each peephole rule is given an identifying number starting with zero and
13119// increasing by one in the order seen by the parser.  An individual peephole
13120// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13121// on the command-line.
13122//
13123// ---------CURRENT LIMITATIONS----------------------------------------------
13124//
13125// Only match adjacent instructions in same basic block
13126// Only equality constraints
13127// Only constraints between operands, not (0.dest_reg == EAX_enc)
13128// Only one replacement instruction
13129//
13130// ---------EXAMPLE----------------------------------------------------------
13131//
13132// // pertinent parts of existing instructions in architecture description
13133// instruct movI(rRegI dst, rRegI src) %{
13134//   match(Set dst (CopyI src));
13135// %}
13136//
13137// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13138//   match(Set dst (AddI dst src));
13139//   effect(KILL cr);
13140// %}
13141//
13142// // Change (inc mov) to lea
13143// peephole %{
13144//   // increment preceeded by register-register move
13145//   peepmatch ( incI_eReg movI );
13146//   // require that the destination register of the increment
13147//   // match the destination register of the move
13148//   peepconstraint ( 0.dst == 1.dst );
13149//   // construct a replacement instruction that sets
13150//   // the destination to ( move's source register + one )
13151//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13152// %}
13153//
13154// Implementation no longer uses movX instructions since
13155// machine-independent system no longer uses CopyX nodes.
13156//
13157// peephole %{
13158//   peepmatch ( incI_eReg movI );
13159//   peepconstraint ( 0.dst == 1.dst );
13160//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13161// %}
13162//
13163// peephole %{
13164//   peepmatch ( decI_eReg movI );
13165//   peepconstraint ( 0.dst == 1.dst );
13166//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13167// %}
13168//
13169// peephole %{
13170//   peepmatch ( addI_eReg_imm movI );
13171//   peepconstraint ( 0.dst == 1.dst );
13172//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13173// %}
13174//
13175// peephole %{
13176//   peepmatch ( addP_eReg_imm movP );
13177//   peepconstraint ( 0.dst == 1.dst );
13178//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13179// %}
13180
13181// // Change load of spilled value to only a spill
13182// instruct storeI(memory mem, rRegI src) %{
13183//   match(Set mem (StoreI mem src));
13184// %}
13185//
13186// instruct loadI(rRegI dst, memory mem) %{
13187//   match(Set dst (LoadI mem));
13188// %}
13189//
13190peephole %{
13191  peepmatch ( loadI storeI );
13192  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13193  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13194%}
13195
13196//----------SMARTSPILL RULES---------------------------------------------------
13197// These must follow all instruction definitions as they use the names
13198// defined in the instructions definitions.
13199