x86_32.ad revision 9801:80f8be586fae
1//
2// Copyright (c) 1997, 2015, Oracle and/or its affiliates. All rights reserved.
3// DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4//
5// This code is free software; you can redistribute it and/or modify it
6// under the terms of the GNU General Public License version 2 only, as
7// published by the Free Software Foundation.
8//
9// This code is distributed in the hope that it will be useful, but WITHOUT
10// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
11// FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12// version 2 for more details (a copy is included in the LICENSE file that
13// accompanied this code).
14//
15// You should have received a copy of the GNU General Public License version
16// 2 along with this work; if not, write to the Free Software Foundation,
17// Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
18//
19// Please contact Oracle, 500 Oracle Parkway, Redwood Shores, CA 94065 USA
20// or visit www.oracle.com if you need additional information or have any
21// questions.
22//
23//
24
25// X86 Architecture Description File
26
27//----------REGISTER DEFINITION BLOCK------------------------------------------
28// This information is used by the matcher and the register allocator to
29// describe individual registers and classes of registers within the target
30// archtecture.
31
32register %{
33//----------Architecture Description Register Definitions----------------------
34// General Registers
35// "reg_def"  name ( register save type, C convention save type,
36//                   ideal register type, encoding );
37// Register Save Types:
38//
39// NS  = No-Save:       The register allocator assumes that these registers
40//                      can be used without saving upon entry to the method, &
41//                      that they do not need to be saved at call sites.
42//
43// SOC = Save-On-Call:  The register allocator assumes that these registers
44//                      can be used without saving upon entry to the method,
45//                      but that they must be saved at call sites.
46//
47// SOE = Save-On-Entry: The register allocator assumes that these registers
48//                      must be saved before using them upon entry to the
49//                      method, but they do not need to be saved at call
50//                      sites.
51//
52// AS  = Always-Save:   The register allocator assumes that these registers
53//                      must be saved before using them upon entry to the
54//                      method, & that they must be saved at call sites.
55//
56// Ideal Register Type is used to determine how to save & restore a
57// register.  Op_RegI will get spilled with LoadI/StoreI, Op_RegP will get
58// spilled with LoadP/StoreP.  If the register supports both, use Op_RegI.
59//
60// The encoding number is the actual bit-pattern placed into the opcodes.
61
62// General Registers
63// Previously set EBX, ESI, and EDI as save-on-entry for java code
64// Turn off SOE in java-code due to frequent use of uncommon-traps.
65// Now that allocator is better, turn on ESI and EDI as SOE registers.
66
67reg_def EBX(SOC, SOE, Op_RegI, 3, rbx->as_VMReg());
68reg_def ECX(SOC, SOC, Op_RegI, 1, rcx->as_VMReg());
69reg_def ESI(SOC, SOE, Op_RegI, 6, rsi->as_VMReg());
70reg_def EDI(SOC, SOE, Op_RegI, 7, rdi->as_VMReg());
71// now that adapter frames are gone EBP is always saved and restored by the prolog/epilog code
72reg_def EBP(NS, SOE, Op_RegI, 5, rbp->as_VMReg());
73reg_def EDX(SOC, SOC, Op_RegI, 2, rdx->as_VMReg());
74reg_def EAX(SOC, SOC, Op_RegI, 0, rax->as_VMReg());
75reg_def ESP( NS,  NS, Op_RegI, 4, rsp->as_VMReg());
76
77// Float registers.  We treat TOS/FPR0 special.  It is invisible to the
78// allocator, and only shows up in the encodings.
79reg_def FPR0L( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
80reg_def FPR0H( SOC, SOC, Op_RegF, 0, VMRegImpl::Bad());
81// Ok so here's the trick FPR1 is really st(0) except in the midst
82// of emission of assembly for a machnode. During the emission the fpu stack
83// is pushed making FPR1 == st(1) temporarily. However at any safepoint
84// the stack will not have this element so FPR1 == st(0) from the
85// oopMap viewpoint. This same weirdness with numbering causes
86// instruction encoding to have to play games with the register
87// encode to correct for this 0/1 issue. See MachSpillCopyNode::implementation
88// where it does flt->flt moves to see an example
89//
90reg_def FPR1L( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg());
91reg_def FPR1H( SOC, SOC, Op_RegF, 1, as_FloatRegister(0)->as_VMReg()->next());
92reg_def FPR2L( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg());
93reg_def FPR2H( SOC, SOC, Op_RegF, 2, as_FloatRegister(1)->as_VMReg()->next());
94reg_def FPR3L( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg());
95reg_def FPR3H( SOC, SOC, Op_RegF, 3, as_FloatRegister(2)->as_VMReg()->next());
96reg_def FPR4L( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg());
97reg_def FPR4H( SOC, SOC, Op_RegF, 4, as_FloatRegister(3)->as_VMReg()->next());
98reg_def FPR5L( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg());
99reg_def FPR5H( SOC, SOC, Op_RegF, 5, as_FloatRegister(4)->as_VMReg()->next());
100reg_def FPR6L( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg());
101reg_def FPR6H( SOC, SOC, Op_RegF, 6, as_FloatRegister(5)->as_VMReg()->next());
102reg_def FPR7L( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg());
103reg_def FPR7H( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next());
104//
105// Empty fill registers, which are never used, but supply alignment to xmm regs
106//
107reg_def FILL0( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(2));
108reg_def FILL1( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(3));
109reg_def FILL2( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(4));
110reg_def FILL3( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(5));
111reg_def FILL4( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(6));
112reg_def FILL5( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(7));
113reg_def FILL6( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(8));
114reg_def FILL7( SOC, SOC, Op_RegF, 7, as_FloatRegister(6)->as_VMReg()->next(9));
115
116// Specify priority of register selection within phases of register
117// allocation.  Highest priority is first.  A useful heuristic is to
118// give registers a low priority when they are required by machine
119// instructions, like EAX and EDX.  Registers which are used as
120// pairs must fall on an even boundary (witness the FPR#L's in this list).
121// For the Intel integer registers, the equivalent Long pairs are
122// EDX:EAX, EBX:ECX, and EDI:EBP.
123alloc_class chunk0( ECX,   EBX,   EBP,   EDI,   EAX,   EDX,   ESI, ESP,
124                    FPR0L, FPR0H, FPR1L, FPR1H, FPR2L, FPR2H,
125                    FPR3L, FPR3H, FPR4L, FPR4H, FPR5L, FPR5H,
126                    FPR6L, FPR6H, FPR7L, FPR7H,
127                    FILL0, FILL1, FILL2, FILL3, FILL4, FILL5, FILL6, FILL7);
128
129
130//----------Architecture Description Register Classes--------------------------
131// Several register classes are automatically defined based upon information in
132// this architecture description.
133// 1) reg_class inline_cache_reg           ( /* as def'd in frame section */ )
134// 2) reg_class compiler_method_oop_reg    ( /* as def'd in frame section */ )
135// 2) reg_class interpreter_method_oop_reg ( /* as def'd in frame section */ )
136// 3) reg_class stack_slots( /* one chunk of stack-based "registers" */ )
137//
138// Class for no registers (empty set).
139reg_class no_reg();
140
141// Class for all registers
142reg_class any_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX, ESP);
143// Class for all registers (excluding EBP)
144reg_class any_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX, ESP);
145// Dynamic register class that selects at runtime between register classes
146// any_reg and any_no_ebp_reg (depending on the value of the flag PreserveFramePointer).
147// Equivalent to: return PreserveFramePointer ? any_no_ebp_reg : any_reg;
148reg_class_dynamic any_reg(any_reg_no_ebp, any_reg_with_ebp, %{ PreserveFramePointer %});
149
150// Class for general registers
151reg_class int_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, ECX, EBX);
152// Class for general registers (excluding EBP).
153// This register class can be used for implicit null checks on win95.
154// It is also safe for use by tailjumps (we don't want to allocate in ebp).
155// Used also if the PreserveFramePointer flag is true.
156reg_class int_reg_no_ebp(EAX, EDX, EDI, ESI, ECX, EBX);
157// Dynamic register class that selects between int_reg and int_reg_no_ebp.
158reg_class_dynamic int_reg(int_reg_no_ebp, int_reg_with_ebp, %{ PreserveFramePointer %});
159
160// Class of "X" registers
161reg_class int_x_reg(EBX, ECX, EDX, EAX);
162
163// Class of registers that can appear in an address with no offset.
164// EBP and ESP require an extra instruction byte for zero offset.
165// Used in fast-unlock
166reg_class p_reg(EDX, EDI, ESI, EBX);
167
168// Class for general registers excluding ECX
169reg_class ncx_reg_with_ebp(EAX, EDX, EBP, EDI, ESI, EBX);
170// Class for general registers excluding ECX (and EBP)
171reg_class ncx_reg_no_ebp(EAX, EDX, EDI, ESI, EBX);
172// Dynamic register class that selects between ncx_reg and ncx_reg_no_ebp.
173reg_class_dynamic ncx_reg(ncx_reg_no_ebp, ncx_reg_with_ebp, %{ PreserveFramePointer %});
174
175// Class for general registers excluding EAX
176reg_class nax_reg(EDX, EDI, ESI, ECX, EBX);
177
178// Class for general registers excluding EAX and EBX.
179reg_class nabx_reg_with_ebp(EDX, EDI, ESI, ECX, EBP);
180// Class for general registers excluding EAX and EBX (and EBP)
181reg_class nabx_reg_no_ebp(EDX, EDI, ESI, ECX);
182// Dynamic register class that selects between nabx_reg and nabx_reg_no_ebp.
183reg_class_dynamic nabx_reg(nabx_reg_no_ebp, nabx_reg_with_ebp, %{ PreserveFramePointer %});
184
185// Class of EAX (for multiply and divide operations)
186reg_class eax_reg(EAX);
187
188// Class of EBX (for atomic add)
189reg_class ebx_reg(EBX);
190
191// Class of ECX (for shift and JCXZ operations and cmpLTMask)
192reg_class ecx_reg(ECX);
193
194// Class of EDX (for multiply and divide operations)
195reg_class edx_reg(EDX);
196
197// Class of EDI (for synchronization)
198reg_class edi_reg(EDI);
199
200// Class of ESI (for synchronization)
201reg_class esi_reg(ESI);
202
203// Singleton class for stack pointer
204reg_class sp_reg(ESP);
205
206// Singleton class for instruction pointer
207// reg_class ip_reg(EIP);
208
209// Class of integer register pairs
210reg_class long_reg_with_ebp( EAX,EDX, ECX,EBX, EBP,EDI );
211// Class of integer register pairs (excluding EBP and EDI);
212reg_class long_reg_no_ebp( EAX,EDX, ECX,EBX );
213// Dynamic register class that selects between long_reg and long_reg_no_ebp.
214reg_class_dynamic long_reg(long_reg_no_ebp, long_reg_with_ebp, %{ PreserveFramePointer %});
215
216// Class of integer register pairs that aligns with calling convention
217reg_class eadx_reg( EAX,EDX );
218reg_class ebcx_reg( ECX,EBX );
219
220// Not AX or DX, used in divides
221reg_class nadx_reg_with_ebp(EBX, ECX, ESI, EDI, EBP);
222// Not AX or DX (and neither EBP), used in divides
223reg_class nadx_reg_no_ebp(EBX, ECX, ESI, EDI);
224// Dynamic register class that selects between nadx_reg and nadx_reg_no_ebp.
225reg_class_dynamic nadx_reg(nadx_reg_no_ebp, nadx_reg_with_ebp, %{ PreserveFramePointer %});
226
227// Floating point registers.  Notice FPR0 is not a choice.
228// FPR0 is not ever allocated; we use clever encodings to fake
229// a 2-address instructions out of Intels FP stack.
230reg_class fp_flt_reg( FPR1L,FPR2L,FPR3L,FPR4L,FPR5L,FPR6L,FPR7L );
231
232reg_class fp_dbl_reg( FPR1L,FPR1H, FPR2L,FPR2H, FPR3L,FPR3H,
233                      FPR4L,FPR4H, FPR5L,FPR5H, FPR6L,FPR6H,
234                      FPR7L,FPR7H );
235
236reg_class fp_flt_reg0( FPR1L );
237reg_class fp_dbl_reg0( FPR1L,FPR1H );
238reg_class fp_dbl_reg1( FPR2L,FPR2H );
239reg_class fp_dbl_notreg0( FPR2L,FPR2H, FPR3L,FPR3H, FPR4L,FPR4H,
240                          FPR5L,FPR5H, FPR6L,FPR6H, FPR7L,FPR7H );
241
242%}
243
244
245//----------SOURCE BLOCK-------------------------------------------------------
246// This is a block of C++ code which provides values, functions, and
247// definitions necessary in the rest of the architecture description
248source_hpp %{
249// Must be visible to the DFA in dfa_x86_32.cpp
250extern bool is_operand_hi32_zero(Node* n);
251%}
252
253source %{
254#define   RELOC_IMM32    Assembler::imm_operand
255#define   RELOC_DISP32   Assembler::disp32_operand
256
257#define __ _masm.
258
259// How to find the high register of a Long pair, given the low register
260#define   HIGH_FROM_LOW(x) ((x)+2)
261
262// These masks are used to provide 128-bit aligned bitmasks to the XMM
263// instructions, to allow sign-masking or sign-bit flipping.  They allow
264// fast versions of NegF/NegD and AbsF/AbsD.
265
266// Note: 'double' and 'long long' have 32-bits alignment on x86.
267static jlong* double_quadword(jlong *adr, jlong lo, jlong hi) {
268  // Use the expression (adr)&(~0xF) to provide 128-bits aligned address
269  // of 128-bits operands for SSE instructions.
270  jlong *operand = (jlong*)(((uintptr_t)adr)&((uintptr_t)(~0xF)));
271  // Store the value to a 128-bits operand.
272  operand[0] = lo;
273  operand[1] = hi;
274  return operand;
275}
276
277// Buffer for 128-bits masks used by SSE instructions.
278static jlong fp_signmask_pool[(4+1)*2]; // 4*128bits(data) + 128bits(alignment)
279
280// Static initialization during VM startup.
281static jlong *float_signmask_pool  = double_quadword(&fp_signmask_pool[1*2], CONST64(0x7FFFFFFF7FFFFFFF), CONST64(0x7FFFFFFF7FFFFFFF));
282static jlong *double_signmask_pool = double_quadword(&fp_signmask_pool[2*2], CONST64(0x7FFFFFFFFFFFFFFF), CONST64(0x7FFFFFFFFFFFFFFF));
283static jlong *float_signflip_pool  = double_quadword(&fp_signmask_pool[3*2], CONST64(0x8000000080000000), CONST64(0x8000000080000000));
284static jlong *double_signflip_pool = double_quadword(&fp_signmask_pool[4*2], CONST64(0x8000000000000000), CONST64(0x8000000000000000));
285
286// Offset hacking within calls.
287static int pre_call_resets_size() {
288  int size = 0;
289  Compile* C = Compile::current();
290  if (C->in_24_bit_fp_mode()) {
291    size += 6; // fldcw
292  }
293  if (C->max_vector_size() > 16) {
294    size += 3; // vzeroupper
295  }
296  return size;
297}
298
299// !!!!! Special hack to get all type of calls to specify the byte offset
300//       from the start of the call to the point where the return address
301//       will point.
302int MachCallStaticJavaNode::ret_addr_offset() {
303  return 5 + pre_call_resets_size();  // 5 bytes from start of call to where return address points
304}
305
306int MachCallDynamicJavaNode::ret_addr_offset() {
307  return 10 + pre_call_resets_size();  // 10 bytes from start of call to where return address points
308}
309
310static int sizeof_FFree_Float_Stack_All = -1;
311
312int MachCallRuntimeNode::ret_addr_offset() {
313  assert(sizeof_FFree_Float_Stack_All != -1, "must have been emitted already");
314  return sizeof_FFree_Float_Stack_All + 5 + pre_call_resets_size();
315}
316
317// Indicate if the safepoint node needs the polling page as an input.
318// Since x86 does have absolute addressing, it doesn't.
319bool SafePointNode::needs_polling_address_input() {
320  return false;
321}
322
323//
324// Compute padding required for nodes which need alignment
325//
326
327// The address of the call instruction needs to be 4-byte aligned to
328// ensure that it does not span a cache line so that it can be patched.
329int CallStaticJavaDirectNode::compute_padding(int current_offset) const {
330  current_offset += pre_call_resets_size();  // skip fldcw, if any
331  current_offset += 1;      // skip call opcode byte
332  return round_to(current_offset, alignment_required()) - current_offset;
333}
334
335// The address of the call instruction needs to be 4-byte aligned to
336// ensure that it does not span a cache line so that it can be patched.
337int CallDynamicJavaDirectNode::compute_padding(int current_offset) const {
338  current_offset += pre_call_resets_size();  // skip fldcw, if any
339  current_offset += 5;      // skip MOV instruction
340  current_offset += 1;      // skip call opcode byte
341  return round_to(current_offset, alignment_required()) - current_offset;
342}
343
344// EMIT_RM()
345void emit_rm(CodeBuffer &cbuf, int f1, int f2, int f3) {
346  unsigned char c = (unsigned char)((f1 << 6) | (f2 << 3) | f3);
347  cbuf.insts()->emit_int8(c);
348}
349
350// EMIT_CC()
351void emit_cc(CodeBuffer &cbuf, int f1, int f2) {
352  unsigned char c = (unsigned char)( f1 | f2 );
353  cbuf.insts()->emit_int8(c);
354}
355
356// EMIT_OPCODE()
357void emit_opcode(CodeBuffer &cbuf, int code) {
358  cbuf.insts()->emit_int8((unsigned char) code);
359}
360
361// EMIT_OPCODE() w/ relocation information
362void emit_opcode(CodeBuffer &cbuf, int code, relocInfo::relocType reloc, int offset = 0) {
363  cbuf.relocate(cbuf.insts_mark() + offset, reloc);
364  emit_opcode(cbuf, code);
365}
366
367// EMIT_D8()
368void emit_d8(CodeBuffer &cbuf, int d8) {
369  cbuf.insts()->emit_int8((unsigned char) d8);
370}
371
372// EMIT_D16()
373void emit_d16(CodeBuffer &cbuf, int d16) {
374  cbuf.insts()->emit_int16(d16);
375}
376
377// EMIT_D32()
378void emit_d32(CodeBuffer &cbuf, int d32) {
379  cbuf.insts()->emit_int32(d32);
380}
381
382// emit 32 bit value and construct relocation entry from relocInfo::relocType
383void emit_d32_reloc(CodeBuffer &cbuf, int d32, relocInfo::relocType reloc,
384        int format) {
385  cbuf.relocate(cbuf.insts_mark(), reloc, format);
386  cbuf.insts()->emit_int32(d32);
387}
388
389// emit 32 bit value and construct relocation entry from RelocationHolder
390void emit_d32_reloc(CodeBuffer &cbuf, int d32, RelocationHolder const& rspec,
391        int format) {
392#ifdef ASSERT
393  if (rspec.reloc()->type() == relocInfo::oop_type && d32 != 0 && d32 != (int)Universe::non_oop_word()) {
394    assert(cast_to_oop(d32)->is_oop() && (ScavengeRootsInCode || !cast_to_oop(d32)->is_scavengable()), "cannot embed scavengable oops in code");
395  }
396#endif
397  cbuf.relocate(cbuf.insts_mark(), rspec, format);
398  cbuf.insts()->emit_int32(d32);
399}
400
401// Access stack slot for load or store
402void store_to_stackslot(CodeBuffer &cbuf, int opcode, int rm_field, int disp) {
403  emit_opcode( cbuf, opcode );               // (e.g., FILD   [ESP+src])
404  if( -128 <= disp && disp <= 127 ) {
405    emit_rm( cbuf, 0x01, rm_field, ESP_enc );  // R/M byte
406    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
407    emit_d8 (cbuf, disp);     // Displacement  // R/M byte
408  } else {
409    emit_rm( cbuf, 0x02, rm_field, ESP_enc );  // R/M byte
410    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);    // SIB byte
411    emit_d32(cbuf, disp);     // Displacement  // R/M byte
412  }
413}
414
415   // rRegI ereg, memory mem) %{    // emit_reg_mem
416void encode_RegMem( CodeBuffer &cbuf, int reg_encoding, int base, int index, int scale, int displace, relocInfo::relocType disp_reloc ) {
417  // There is no index & no scale, use form without SIB byte
418  if ((index == 0x4) &&
419      (scale == 0) && (base != ESP_enc)) {
420    // If no displacement, mode is 0x0; unless base is [EBP]
421    if ( (displace == 0) && (base != EBP_enc) ) {
422      emit_rm(cbuf, 0x0, reg_encoding, base);
423    }
424    else {                    // If 8-bit displacement, mode 0x1
425      if ((displace >= -128) && (displace <= 127)
426          && (disp_reloc == relocInfo::none) ) {
427        emit_rm(cbuf, 0x1, reg_encoding, base);
428        emit_d8(cbuf, displace);
429      }
430      else {                  // If 32-bit displacement
431        if (base == -1) { // Special flag for absolute address
432          emit_rm(cbuf, 0x0, reg_encoding, 0x5);
433          // (manual lies; no SIB needed here)
434          if ( disp_reloc != relocInfo::none ) {
435            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
436          } else {
437            emit_d32      (cbuf, displace);
438          }
439        }
440        else {                // Normal base + offset
441          emit_rm(cbuf, 0x2, reg_encoding, base);
442          if ( disp_reloc != relocInfo::none ) {
443            emit_d32_reloc(cbuf, displace, disp_reloc, 1);
444          } else {
445            emit_d32      (cbuf, displace);
446          }
447        }
448      }
449    }
450  }
451  else {                      // Else, encode with the SIB byte
452    // If no displacement, mode is 0x0; unless base is [EBP]
453    if (displace == 0 && (base != EBP_enc)) {  // If no displacement
454      emit_rm(cbuf, 0x0, reg_encoding, 0x4);
455      emit_rm(cbuf, scale, index, base);
456    }
457    else {                    // If 8-bit displacement, mode 0x1
458      if ((displace >= -128) && (displace <= 127)
459          && (disp_reloc == relocInfo::none) ) {
460        emit_rm(cbuf, 0x1, reg_encoding, 0x4);
461        emit_rm(cbuf, scale, index, base);
462        emit_d8(cbuf, displace);
463      }
464      else {                  // If 32-bit displacement
465        if (base == 0x04 ) {
466          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
467          emit_rm(cbuf, scale, index, 0x04);
468        } else {
469          emit_rm(cbuf, 0x2, reg_encoding, 0x4);
470          emit_rm(cbuf, scale, index, base);
471        }
472        if ( disp_reloc != relocInfo::none ) {
473          emit_d32_reloc(cbuf, displace, disp_reloc, 1);
474        } else {
475          emit_d32      (cbuf, displace);
476        }
477      }
478    }
479  }
480}
481
482
483void encode_Copy( CodeBuffer &cbuf, int dst_encoding, int src_encoding ) {
484  if( dst_encoding == src_encoding ) {
485    // reg-reg copy, use an empty encoding
486  } else {
487    emit_opcode( cbuf, 0x8B );
488    emit_rm(cbuf, 0x3, dst_encoding, src_encoding );
489  }
490}
491
492void emit_cmpfp_fixup(MacroAssembler& _masm) {
493  Label exit;
494  __ jccb(Assembler::noParity, exit);
495  __ pushf();
496  //
497  // comiss/ucomiss instructions set ZF,PF,CF flags and
498  // zero OF,AF,SF for NaN values.
499  // Fixup flags by zeroing ZF,PF so that compare of NaN
500  // values returns 'less than' result (CF is set).
501  // Leave the rest of flags unchanged.
502  //
503  //    7 6 5 4 3 2 1 0
504  //   |S|Z|r|A|r|P|r|C|  (r - reserved bit)
505  //    0 0 1 0 1 0 1 1   (0x2B)
506  //
507  __ andl(Address(rsp, 0), 0xffffff2b);
508  __ popf();
509  __ bind(exit);
510}
511
512void emit_cmpfp3(MacroAssembler& _masm, Register dst) {
513  Label done;
514  __ movl(dst, -1);
515  __ jcc(Assembler::parity, done);
516  __ jcc(Assembler::below, done);
517  __ setb(Assembler::notEqual, dst);
518  __ movzbl(dst, dst);
519  __ bind(done);
520}
521
522
523//=============================================================================
524const RegMask& MachConstantBaseNode::_out_RegMask = RegMask::Empty;
525
526int Compile::ConstantTable::calculate_table_base_offset() const {
527  return 0;  // absolute addressing, no offset
528}
529
530bool MachConstantBaseNode::requires_postalloc_expand() const { return false; }
531void MachConstantBaseNode::postalloc_expand(GrowableArray <Node *> *nodes, PhaseRegAlloc *ra_) {
532  ShouldNotReachHere();
533}
534
535void MachConstantBaseNode::emit(CodeBuffer& cbuf, PhaseRegAlloc* ra_) const {
536  // Empty encoding
537}
538
539uint MachConstantBaseNode::size(PhaseRegAlloc* ra_) const {
540  return 0;
541}
542
543#ifndef PRODUCT
544void MachConstantBaseNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
545  st->print("# MachConstantBaseNode (empty encoding)");
546}
547#endif
548
549
550//=============================================================================
551#ifndef PRODUCT
552void MachPrologNode::format(PhaseRegAlloc* ra_, outputStream* st) const {
553  Compile* C = ra_->C;
554
555  int framesize = C->frame_size_in_bytes();
556  int bangsize = C->bang_size_in_bytes();
557  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
558  // Remove wordSize for return addr which is already pushed.
559  framesize -= wordSize;
560
561  if (C->need_stack_bang(bangsize)) {
562    framesize -= wordSize;
563    st->print("# stack bang (%d bytes)", bangsize);
564    st->print("\n\t");
565    st->print("PUSH   EBP\t# Save EBP");
566    if (PreserveFramePointer) {
567      st->print("\n\t");
568      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
569    }
570    if (framesize) {
571      st->print("\n\t");
572      st->print("SUB    ESP, #%d\t# Create frame",framesize);
573    }
574  } else {
575    st->print("SUB    ESP, #%d\t# Create frame",framesize);
576    st->print("\n\t");
577    framesize -= wordSize;
578    st->print("MOV    [ESP + #%d], EBP\t# Save EBP",framesize);
579    if (PreserveFramePointer) {
580      st->print("\n\t");
581      st->print("MOV    EBP, ESP\t# Save the caller's SP into EBP");
582      if (framesize > 0) {
583        st->print("\n\t");
584        st->print("ADD    EBP, #%d", framesize);
585      }
586    }
587  }
588
589  if (VerifyStackAtCalls) {
590    st->print("\n\t");
591    framesize -= wordSize;
592    st->print("MOV    [ESP + #%d], 0xBADB100D\t# Majik cookie for stack depth check",framesize);
593  }
594
595  if( C->in_24_bit_fp_mode() ) {
596    st->print("\n\t");
597    st->print("FLDCW  \t# load 24 bit fpu control word");
598  }
599  if (UseSSE >= 2 && VerifyFPU) {
600    st->print("\n\t");
601    st->print("# verify FPU stack (must be clean on entry)");
602  }
603
604#ifdef ASSERT
605  if (VerifyStackAtCalls) {
606    st->print("\n\t");
607    st->print("# stack alignment check");
608  }
609#endif
610  st->cr();
611}
612#endif
613
614
615void MachPrologNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
616  Compile* C = ra_->C;
617  MacroAssembler _masm(&cbuf);
618
619  int framesize = C->frame_size_in_bytes();
620  int bangsize = C->bang_size_in_bytes();
621
622  __ verified_entry(framesize, C->need_stack_bang(bangsize)?bangsize:0, C->in_24_bit_fp_mode());
623
624  C->set_frame_complete(cbuf.insts_size());
625
626  if (C->has_mach_constant_base_node()) {
627    // NOTE: We set the table base offset here because users might be
628    // emitted before MachConstantBaseNode.
629    Compile::ConstantTable& constant_table = C->constant_table();
630    constant_table.set_table_base_offset(constant_table.calculate_table_base_offset());
631  }
632}
633
634uint MachPrologNode::size(PhaseRegAlloc *ra_) const {
635  return MachNode::size(ra_); // too many variables; just compute it the hard way
636}
637
638int MachPrologNode::reloc() const {
639  return 0; // a large enough number
640}
641
642//=============================================================================
643#ifndef PRODUCT
644void MachEpilogNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
645  Compile *C = ra_->C;
646  int framesize = C->frame_size_in_bytes();
647  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
648  // Remove two words for return addr and rbp,
649  framesize -= 2*wordSize;
650
651  if (C->max_vector_size() > 16) {
652    st->print("VZEROUPPER");
653    st->cr(); st->print("\t");
654  }
655  if (C->in_24_bit_fp_mode()) {
656    st->print("FLDCW  standard control word");
657    st->cr(); st->print("\t");
658  }
659  if (framesize) {
660    st->print("ADD    ESP,%d\t# Destroy frame",framesize);
661    st->cr(); st->print("\t");
662  }
663  st->print_cr("POPL   EBP"); st->print("\t");
664  if (do_polling() && C->is_method_compilation()) {
665    st->print("TEST   PollPage,EAX\t! Poll Safepoint");
666    st->cr(); st->print("\t");
667  }
668}
669#endif
670
671void MachEpilogNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
672  Compile *C = ra_->C;
673  MacroAssembler _masm(&cbuf);
674
675  if (C->max_vector_size() > 16) {
676    // Clear upper bits of YMM registers when current compiled code uses
677    // wide vectors to avoid AVX <-> SSE transition penalty during call.
678    _masm.vzeroupper();
679  }
680  // If method set FPU control word, restore to standard control word
681  if (C->in_24_bit_fp_mode()) {
682    _masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
683  }
684
685  int framesize = C->frame_size_in_bytes();
686  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
687  // Remove two words for return addr and rbp,
688  framesize -= 2*wordSize;
689
690  // Note that VerifyStackAtCalls' Majik cookie does not change the frame size popped here
691
692  if (framesize >= 128) {
693    emit_opcode(cbuf, 0x81); // add  SP, #framesize
694    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
695    emit_d32(cbuf, framesize);
696  } else if (framesize) {
697    emit_opcode(cbuf, 0x83); // add  SP, #framesize
698    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
699    emit_d8(cbuf, framesize);
700  }
701
702  emit_opcode(cbuf, 0x58 | EBP_enc);
703
704  if (StackReservedPages > 0 && C->has_reserved_stack_access()) {
705    __ reserved_stack_check();
706  }
707
708  if (do_polling() && C->is_method_compilation()) {
709    cbuf.relocate(cbuf.insts_end(), relocInfo::poll_return_type, 0);
710    emit_opcode(cbuf,0x85);
711    emit_rm(cbuf, 0x0, EAX_enc, 0x5); // EAX
712    emit_d32(cbuf, (intptr_t)os::get_polling_page());
713  }
714}
715
716uint MachEpilogNode::size(PhaseRegAlloc *ra_) const {
717  Compile *C = ra_->C;
718  // If method set FPU control word, restore to standard control word
719  int size = C->in_24_bit_fp_mode() ? 6 : 0;
720  if (C->max_vector_size() > 16) size += 3; // vzeroupper
721  if (do_polling() && C->is_method_compilation()) size += 6;
722
723  int framesize = C->frame_size_in_bytes();
724  assert((framesize & (StackAlignmentInBytes-1)) == 0, "frame size not aligned");
725  // Remove two words for return addr and rbp,
726  framesize -= 2*wordSize;
727
728  size++; // popl rbp,
729
730  if (framesize >= 128) {
731    size += 6;
732  } else {
733    size += framesize ? 3 : 0;
734  }
735  size += 64; // added to support ReservedStackAccess
736  return size;
737}
738
739int MachEpilogNode::reloc() const {
740  return 0; // a large enough number
741}
742
743const Pipeline * MachEpilogNode::pipeline() const {
744  return MachNode::pipeline_class();
745}
746
747int MachEpilogNode::safepoint_offset() const { return 0; }
748
749//=============================================================================
750
751enum RC { rc_bad, rc_int, rc_float, rc_xmm, rc_stack };
752static enum RC rc_class( OptoReg::Name reg ) {
753
754  if( !OptoReg::is_valid(reg)  ) return rc_bad;
755  if (OptoReg::is_stack(reg)) return rc_stack;
756
757  VMReg r = OptoReg::as_VMReg(reg);
758  if (r->is_Register()) return rc_int;
759  if (r->is_FloatRegister()) {
760    assert(UseSSE < 2, "shouldn't be used in SSE2+ mode");
761    return rc_float;
762  }
763  assert(r->is_XMMRegister(), "must be");
764  return rc_xmm;
765}
766
767static int impl_helper( CodeBuffer *cbuf, bool do_size, bool is_load, int offset, int reg,
768                        int opcode, const char *op_str, int size, outputStream* st ) {
769  if( cbuf ) {
770    emit_opcode  (*cbuf, opcode );
771    encode_RegMem(*cbuf, Matcher::_regEncode[reg], ESP_enc, 0x4, 0, offset, relocInfo::none);
772#ifndef PRODUCT
773  } else if( !do_size ) {
774    if( size != 0 ) st->print("\n\t");
775    if( opcode == 0x8B || opcode == 0x89 ) { // MOV
776      if( is_load ) st->print("%s   %s,[ESP + #%d]",op_str,Matcher::regName[reg],offset);
777      else          st->print("%s   [ESP + #%d],%s",op_str,offset,Matcher::regName[reg]);
778    } else { // FLD, FST, PUSH, POP
779      st->print("%s [ESP + #%d]",op_str,offset);
780    }
781#endif
782  }
783  int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
784  return size+3+offset_size;
785}
786
787// Helper for XMM registers.  Extra opcode bits, limited syntax.
788static int impl_x_helper( CodeBuffer *cbuf, bool do_size, bool is_load,
789                         int offset, int reg_lo, int reg_hi, int size, outputStream* st ) {
790  int in_size_in_bits = Assembler::EVEX_32bit;
791  int evex_encoding = 0;
792  if (reg_lo+1 == reg_hi) {
793    in_size_in_bits = Assembler::EVEX_64bit;
794    evex_encoding = Assembler::VEX_W;
795  }
796  if (cbuf) {
797    MacroAssembler _masm(cbuf);
798    if (reg_lo+1 == reg_hi) { // double move?
799      if (is_load) {
800        __ movdbl(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
801      } else {
802        __ movdbl(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
803      }
804    } else {
805      if (is_load) {
806        __ movflt(as_XMMRegister(Matcher::_regEncode[reg_lo]), Address(rsp, offset));
807      } else {
808        __ movflt(Address(rsp, offset), as_XMMRegister(Matcher::_regEncode[reg_lo]));
809      }
810    }
811#ifndef PRODUCT
812  } else if (!do_size) {
813    if (size != 0) st->print("\n\t");
814    if (reg_lo+1 == reg_hi) { // double move?
815      if (is_load) st->print("%s %s,[ESP + #%d]",
816                              UseXmmLoadAndClearUpper ? "MOVSD " : "MOVLPD",
817                              Matcher::regName[reg_lo], offset);
818      else         st->print("MOVSD  [ESP + #%d],%s",
819                              offset, Matcher::regName[reg_lo]);
820    } else {
821      if (is_load) st->print("MOVSS  %s,[ESP + #%d]",
822                              Matcher::regName[reg_lo], offset);
823      else         st->print("MOVSS  [ESP + #%d],%s",
824                              offset, Matcher::regName[reg_lo]);
825    }
826#endif
827  }
828  bool is_single_byte = false;
829  if ((UseAVX > 2) && (offset != 0)) {
830    is_single_byte = Assembler::query_compressed_disp_byte(offset, true, 0, Assembler::EVEX_T1S, in_size_in_bits, evex_encoding);
831  }
832  int offset_size = 0;
833  if (UseAVX > 2 ) {
834    offset_size = (offset == 0) ? 0 : ((is_single_byte) ? 1 : 4);
835  } else {
836    offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
837  }
838  size += (UseAVX > 2) ? 2 : 0; // Need an additional two bytes for EVEX
839  // VEX_2bytes prefix is used if UseAVX > 0, so it takes the same 2 bytes as SIMD prefix.
840  return size+5+offset_size;
841}
842
843
844static int impl_movx_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
845                            int src_hi, int dst_hi, int size, outputStream* st ) {
846  if (cbuf) {
847    MacroAssembler _masm(cbuf);
848    if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
849      __ movdbl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
850                as_XMMRegister(Matcher::_regEncode[src_lo]));
851    } else {
852      __ movflt(as_XMMRegister(Matcher::_regEncode[dst_lo]),
853                as_XMMRegister(Matcher::_regEncode[src_lo]));
854    }
855#ifndef PRODUCT
856  } else if (!do_size) {
857    if (size != 0) st->print("\n\t");
858    if (UseXmmRegToRegMoveAll) {//Use movaps,movapd to move between xmm registers
859      if (src_lo+1 == src_hi && dst_lo+1 == dst_hi) { // double move?
860        st->print("MOVAPD %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
861      } else {
862        st->print("MOVAPS %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
863      }
864    } else {
865      if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double move?
866        st->print("MOVSD  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
867      } else {
868        st->print("MOVSS  %s,%s",Matcher::regName[dst_lo],Matcher::regName[src_lo]);
869      }
870    }
871#endif
872  }
873  // VEX_2bytes prefix is used if UseAVX > 0, and it takes the same 2 bytes as SIMD prefix.
874  // Only MOVAPS SSE prefix uses 1 byte.  EVEX uses an additional 2 bytes.
875  int sz = (UseAVX > 2) ? 6 : 4;
876  if (!(src_lo+1 == src_hi && dst_lo+1 == dst_hi) &&
877      UseXmmRegToRegMoveAll && (UseAVX == 0)) sz = 3;
878  return size + sz;
879}
880
881static int impl_movgpr2x_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
882                            int src_hi, int dst_hi, int size, outputStream* st ) {
883  // 32-bit
884  if (cbuf) {
885    MacroAssembler _masm(cbuf);
886    __ movdl(as_XMMRegister(Matcher::_regEncode[dst_lo]),
887             as_Register(Matcher::_regEncode[src_lo]));
888#ifndef PRODUCT
889  } else if (!do_size) {
890    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
891#endif
892  }
893  return (UseAVX> 2) ? 6 : 4;
894}
895
896
897static int impl_movx2gpr_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
898                                 int src_hi, int dst_hi, int size, outputStream* st ) {
899  // 32-bit
900  if (cbuf) {
901    MacroAssembler _masm(cbuf);
902    __ movdl(as_Register(Matcher::_regEncode[dst_lo]),
903             as_XMMRegister(Matcher::_regEncode[src_lo]));
904#ifndef PRODUCT
905  } else if (!do_size) {
906    st->print("movdl   %s, %s\t# spill", Matcher::regName[dst_lo], Matcher::regName[src_lo]);
907#endif
908  }
909  return (UseAVX> 2) ? 6 : 4;
910}
911
912static int impl_mov_helper( CodeBuffer *cbuf, bool do_size, int src, int dst, int size, outputStream* st ) {
913  if( cbuf ) {
914    emit_opcode(*cbuf, 0x8B );
915    emit_rm    (*cbuf, 0x3, Matcher::_regEncode[dst], Matcher::_regEncode[src] );
916#ifndef PRODUCT
917  } else if( !do_size ) {
918    if( size != 0 ) st->print("\n\t");
919    st->print("MOV    %s,%s",Matcher::regName[dst],Matcher::regName[src]);
920#endif
921  }
922  return size+2;
923}
924
925static int impl_fp_store_helper( CodeBuffer *cbuf, bool do_size, int src_lo, int src_hi, int dst_lo, int dst_hi,
926                                 int offset, int size, outputStream* st ) {
927  if( src_lo != FPR1L_num ) {      // Move value to top of FP stack, if not already there
928    if( cbuf ) {
929      emit_opcode( *cbuf, 0xD9 );  // FLD (i.e., push it)
930      emit_d8( *cbuf, 0xC0-1+Matcher::_regEncode[src_lo] );
931#ifndef PRODUCT
932    } else if( !do_size ) {
933      if( size != 0 ) st->print("\n\t");
934      st->print("FLD    %s",Matcher::regName[src_lo]);
935#endif
936    }
937    size += 2;
938  }
939
940  int st_op = (src_lo != FPR1L_num) ? EBX_num /*store & pop*/ : EDX_num /*store no pop*/;
941  const char *op_str;
942  int op;
943  if( src_lo+1 == src_hi && dst_lo+1 == dst_hi ) { // double store?
944    op_str = (src_lo != FPR1L_num) ? "FSTP_D" : "FST_D ";
945    op = 0xDD;
946  } else {                   // 32-bit store
947    op_str = (src_lo != FPR1L_num) ? "FSTP_S" : "FST_S ";
948    op = 0xD9;
949    assert( !OptoReg::is_valid(src_hi) && !OptoReg::is_valid(dst_hi), "no non-adjacent float-stores" );
950  }
951
952  return impl_helper(cbuf,do_size,false,offset,st_op,op,op_str,size, st);
953}
954
955// Next two methods are shared by 32- and 64-bit VM. They are defined in x86.ad.
956static int vec_mov_helper(CodeBuffer *cbuf, bool do_size, int src_lo, int dst_lo,
957                          int src_hi, int dst_hi, uint ireg, outputStream* st);
958
959static int vec_spill_helper(CodeBuffer *cbuf, bool do_size, bool is_load,
960                            int stack_offset, int reg, uint ireg, outputStream* st);
961
962static int vec_stack_to_stack_helper(CodeBuffer *cbuf, bool do_size, int src_offset,
963                                     int dst_offset, uint ireg, outputStream* st) {
964  int calc_size = 0;
965  int src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
966  int dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
967  switch (ireg) {
968  case Op_VecS:
969    calc_size = 3+src_offset_size + 3+dst_offset_size;
970    break;
971  case Op_VecD:
972    calc_size = 3+src_offset_size + 3+dst_offset_size;
973    src_offset += 4;
974    dst_offset += 4;
975    src_offset_size = (src_offset == 0) ? 0 : ((src_offset < 0x80) ? 1 : 4);
976    dst_offset_size = (dst_offset == 0) ? 0 : ((dst_offset < 0x80) ? 1 : 4);
977    calc_size += 3+src_offset_size + 3+dst_offset_size;
978    break;
979  case Op_VecX:
980  case Op_VecY:
981  case Op_VecZ:
982    calc_size = 6 + 6 + 5+src_offset_size + 5+dst_offset_size;
983    break;
984  default:
985    ShouldNotReachHere();
986  }
987  if (cbuf) {
988    MacroAssembler _masm(cbuf);
989    int offset = __ offset();
990    switch (ireg) {
991    case Op_VecS:
992      __ pushl(Address(rsp, src_offset));
993      __ popl (Address(rsp, dst_offset));
994      break;
995    case Op_VecD:
996      __ pushl(Address(rsp, src_offset));
997      __ popl (Address(rsp, dst_offset));
998      __ pushl(Address(rsp, src_offset+4));
999      __ popl (Address(rsp, dst_offset+4));
1000      break;
1001    case Op_VecX:
1002      __ movdqu(Address(rsp, -16), xmm0);
1003      __ movdqu(xmm0, Address(rsp, src_offset));
1004      __ movdqu(Address(rsp, dst_offset), xmm0);
1005      __ movdqu(xmm0, Address(rsp, -16));
1006      break;
1007    case Op_VecY:
1008      __ vmovdqu(Address(rsp, -32), xmm0);
1009      __ vmovdqu(xmm0, Address(rsp, src_offset));
1010      __ vmovdqu(Address(rsp, dst_offset), xmm0);
1011      __ vmovdqu(xmm0, Address(rsp, -32));
1012    case Op_VecZ:
1013      __ evmovdqul(Address(rsp, -64), xmm0, 2);
1014      __ evmovdqul(xmm0, Address(rsp, src_offset), 2);
1015      __ evmovdqul(Address(rsp, dst_offset), xmm0, 2);
1016      __ evmovdqul(xmm0, Address(rsp, -64), 2);
1017      break;
1018    default:
1019      ShouldNotReachHere();
1020    }
1021    int size = __ offset() - offset;
1022    assert(size == calc_size, "incorrect size calculattion");
1023    return size;
1024#ifndef PRODUCT
1025  } else if (!do_size) {
1026    switch (ireg) {
1027    case Op_VecS:
1028      st->print("pushl   [rsp + #%d]\t# 32-bit mem-mem spill\n\t"
1029                "popl    [rsp + #%d]",
1030                src_offset, dst_offset);
1031      break;
1032    case Op_VecD:
1033      st->print("pushl   [rsp + #%d]\t# 64-bit mem-mem spill\n\t"
1034                "popq    [rsp + #%d]\n\t"
1035                "pushl   [rsp + #%d]\n\t"
1036                "popq    [rsp + #%d]",
1037                src_offset, dst_offset, src_offset+4, dst_offset+4);
1038      break;
1039     case Op_VecX:
1040      st->print("movdqu  [rsp - #16], xmm0\t# 128-bit mem-mem spill\n\t"
1041                "movdqu  xmm0, [rsp + #%d]\n\t"
1042                "movdqu  [rsp + #%d], xmm0\n\t"
1043                "movdqu  xmm0, [rsp - #16]",
1044                src_offset, dst_offset);
1045      break;
1046    case Op_VecY:
1047      st->print("vmovdqu [rsp - #32], xmm0\t# 256-bit mem-mem spill\n\t"
1048                "vmovdqu xmm0, [rsp + #%d]\n\t"
1049                "vmovdqu [rsp + #%d], xmm0\n\t"
1050                "vmovdqu xmm0, [rsp - #32]",
1051                src_offset, dst_offset);
1052    case Op_VecZ:
1053      st->print("vmovdqu [rsp - #64], xmm0\t# 512-bit mem-mem spill\n\t"
1054                "vmovdqu xmm0, [rsp + #%d]\n\t"
1055                "vmovdqu [rsp + #%d], xmm0\n\t"
1056                "vmovdqu xmm0, [rsp - #64]",
1057                src_offset, dst_offset);
1058      break;
1059    default:
1060      ShouldNotReachHere();
1061    }
1062#endif
1063  }
1064  return calc_size;
1065}
1066
1067uint MachSpillCopyNode::implementation( CodeBuffer *cbuf, PhaseRegAlloc *ra_, bool do_size, outputStream* st ) const {
1068  // Get registers to move
1069  OptoReg::Name src_second = ra_->get_reg_second(in(1));
1070  OptoReg::Name src_first = ra_->get_reg_first(in(1));
1071  OptoReg::Name dst_second = ra_->get_reg_second(this );
1072  OptoReg::Name dst_first = ra_->get_reg_first(this );
1073
1074  enum RC src_second_rc = rc_class(src_second);
1075  enum RC src_first_rc = rc_class(src_first);
1076  enum RC dst_second_rc = rc_class(dst_second);
1077  enum RC dst_first_rc = rc_class(dst_first);
1078
1079  assert( OptoReg::is_valid(src_first) && OptoReg::is_valid(dst_first), "must move at least 1 register" );
1080
1081  // Generate spill code!
1082  int size = 0;
1083
1084  if( src_first == dst_first && src_second == dst_second )
1085    return size;            // Self copy, no move
1086
1087  if (bottom_type()->isa_vect() != NULL) {
1088    uint ireg = ideal_reg();
1089    assert((src_first_rc != rc_int && dst_first_rc != rc_int), "sanity");
1090    assert((src_first_rc != rc_float && dst_first_rc != rc_float), "sanity");
1091    assert((ireg == Op_VecS || ireg == Op_VecD || ireg == Op_VecX || ireg == Op_VecY || ireg == Op_VecZ ), "sanity");
1092    if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1093      // mem -> mem
1094      int src_offset = ra_->reg2offset(src_first);
1095      int dst_offset = ra_->reg2offset(dst_first);
1096      return vec_stack_to_stack_helper(cbuf, do_size, src_offset, dst_offset, ireg, st);
1097    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1098      return vec_mov_helper(cbuf, do_size, src_first, dst_first, src_second, dst_second, ireg, st);
1099    } else if (src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1100      int stack_offset = ra_->reg2offset(dst_first);
1101      return vec_spill_helper(cbuf, do_size, false, stack_offset, src_first, ireg, st);
1102    } else if (src_first_rc == rc_stack && dst_first_rc == rc_xmm ) {
1103      int stack_offset = ra_->reg2offset(src_first);
1104      return vec_spill_helper(cbuf, do_size, true,  stack_offset, dst_first, ireg, st);
1105    } else {
1106      ShouldNotReachHere();
1107    }
1108  }
1109
1110  // --------------------------------------
1111  // Check for mem-mem move.  push/pop to move.
1112  if( src_first_rc == rc_stack && dst_first_rc == rc_stack ) {
1113    if( src_second == dst_first ) { // overlapping stack copy ranges
1114      assert( src_second_rc == rc_stack && dst_second_rc == rc_stack, "we only expect a stk-stk copy here" );
1115      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1116      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1117      src_second_rc = dst_second_rc = rc_bad;  // flag as already moved the second bits
1118    }
1119    // move low bits
1120    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),ESI_num,0xFF,"PUSH  ",size, st);
1121    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),EAX_num,0x8F,"POP   ",size, st);
1122    if( src_second_rc == rc_stack && dst_second_rc == rc_stack ) { // mov second bits
1123      size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),ESI_num,0xFF,"PUSH  ",size, st);
1124      size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),EAX_num,0x8F,"POP   ",size, st);
1125    }
1126    return size;
1127  }
1128
1129  // --------------------------------------
1130  // Check for integer reg-reg copy
1131  if( src_first_rc == rc_int && dst_first_rc == rc_int )
1132    size = impl_mov_helper(cbuf,do_size,src_first,dst_first,size, st);
1133
1134  // Check for integer store
1135  if( src_first_rc == rc_int && dst_first_rc == rc_stack )
1136    size = impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first,0x89,"MOV ",size, st);
1137
1138  // Check for integer load
1139  if( dst_first_rc == rc_int && src_first_rc == rc_stack )
1140    size = impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first,0x8B,"MOV ",size, st);
1141
1142  // Check for integer reg-xmm reg copy
1143  if( src_first_rc == rc_int && dst_first_rc == rc_xmm ) {
1144    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1145            "no 64 bit integer-float reg moves" );
1146    return impl_movgpr2x_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1147  }
1148  // --------------------------------------
1149  // Check for float reg-reg copy
1150  if( src_first_rc == rc_float && dst_first_rc == rc_float ) {
1151    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1152            (src_first+1 == src_second && dst_first+1 == dst_second), "no non-adjacent float-moves" );
1153    if( cbuf ) {
1154
1155      // Note the mucking with the register encode to compensate for the 0/1
1156      // indexing issue mentioned in a comment in the reg_def sections
1157      // for FPR registers many lines above here.
1158
1159      if( src_first != FPR1L_num ) {
1160        emit_opcode  (*cbuf, 0xD9 );           // FLD    ST(i)
1161        emit_d8      (*cbuf, 0xC0+Matcher::_regEncode[src_first]-1 );
1162        emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1163        emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1164     } else {
1165        emit_opcode  (*cbuf, 0xDD );           // FST    ST(i)
1166        emit_d8      (*cbuf, 0xD0+Matcher::_regEncode[dst_first]-1 );
1167     }
1168#ifndef PRODUCT
1169    } else if( !do_size ) {
1170      if( size != 0 ) st->print("\n\t");
1171      if( src_first != FPR1L_num ) st->print("FLD    %s\n\tFSTP   %s",Matcher::regName[src_first],Matcher::regName[dst_first]);
1172      else                      st->print(             "FST    %s",                            Matcher::regName[dst_first]);
1173#endif
1174    }
1175    return size + ((src_first != FPR1L_num) ? 2+2 : 2);
1176  }
1177
1178  // Check for float store
1179  if( src_first_rc == rc_float && dst_first_rc == rc_stack ) {
1180    return impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,ra_->reg2offset(dst_first),size, st);
1181  }
1182
1183  // Check for float load
1184  if( dst_first_rc == rc_float && src_first_rc == rc_stack ) {
1185    int offset = ra_->reg2offset(src_first);
1186    const char *op_str;
1187    int op;
1188    if( src_first+1 == src_second && dst_first+1 == dst_second ) { // double load?
1189      op_str = "FLD_D";
1190      op = 0xDD;
1191    } else {                   // 32-bit load
1192      op_str = "FLD_S";
1193      op = 0xD9;
1194      assert( src_second_rc == rc_bad && dst_second_rc == rc_bad, "no non-adjacent float-loads" );
1195    }
1196    if( cbuf ) {
1197      emit_opcode  (*cbuf, op );
1198      encode_RegMem(*cbuf, 0x0, ESP_enc, 0x4, 0, offset, relocInfo::none);
1199      emit_opcode  (*cbuf, 0xDD );           // FSTP   ST(i)
1200      emit_d8      (*cbuf, 0xD8+Matcher::_regEncode[dst_first] );
1201#ifndef PRODUCT
1202    } else if( !do_size ) {
1203      if( size != 0 ) st->print("\n\t");
1204      st->print("%s  ST,[ESP + #%d]\n\tFSTP   %s",op_str, offset,Matcher::regName[dst_first]);
1205#endif
1206    }
1207    int offset_size = (offset == 0) ? 0 : ((offset <= 127) ? 1 : 4);
1208    return size + 3+offset_size+2;
1209  }
1210
1211  // Check for xmm reg-reg copy
1212  if( src_first_rc == rc_xmm && dst_first_rc == rc_xmm ) {
1213    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad) ||
1214            (src_first+1 == src_second && dst_first+1 == dst_second),
1215            "no non-adjacent float-moves" );
1216    return impl_movx_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1217  }
1218
1219  // Check for xmm reg-integer reg copy
1220  if( src_first_rc == rc_xmm && dst_first_rc == rc_int ) {
1221    assert( (src_second_rc == rc_bad && dst_second_rc == rc_bad),
1222            "no 64 bit float-integer reg moves" );
1223    return impl_movx2gpr_helper(cbuf,do_size,src_first,dst_first,src_second, dst_second, size, st);
1224  }
1225
1226  // Check for xmm store
1227  if( src_first_rc == rc_xmm && dst_first_rc == rc_stack ) {
1228    return impl_x_helper(cbuf,do_size,false,ra_->reg2offset(dst_first),src_first, src_second, size, st);
1229  }
1230
1231  // Check for float xmm load
1232  if( dst_first_rc == rc_xmm && src_first_rc == rc_stack ) {
1233    return impl_x_helper(cbuf,do_size,true ,ra_->reg2offset(src_first),dst_first, dst_second, size, st);
1234  }
1235
1236  // Copy from float reg to xmm reg
1237  if( dst_first_rc == rc_xmm && src_first_rc == rc_float ) {
1238    // copy to the top of stack from floating point reg
1239    // and use LEA to preserve flags
1240    if( cbuf ) {
1241      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP-8]
1242      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1243      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1244      emit_d8(*cbuf,0xF8);
1245#ifndef PRODUCT
1246    } else if( !do_size ) {
1247      if( size != 0 ) st->print("\n\t");
1248      st->print("LEA    ESP,[ESP-8]");
1249#endif
1250    }
1251    size += 4;
1252
1253    size = impl_fp_store_helper(cbuf,do_size,src_first,src_second,dst_first,dst_second,0,size, st);
1254
1255    // Copy from the temp memory to the xmm reg.
1256    size = impl_x_helper(cbuf,do_size,true ,0,dst_first, dst_second, size, st);
1257
1258    if( cbuf ) {
1259      emit_opcode(*cbuf,0x8D);  // LEA  ESP,[ESP+8]
1260      emit_rm(*cbuf, 0x1, ESP_enc, 0x04);
1261      emit_rm(*cbuf, 0x0, 0x04, ESP_enc);
1262      emit_d8(*cbuf,0x08);
1263#ifndef PRODUCT
1264    } else if( !do_size ) {
1265      if( size != 0 ) st->print("\n\t");
1266      st->print("LEA    ESP,[ESP+8]");
1267#endif
1268    }
1269    size += 4;
1270    return size;
1271  }
1272
1273  assert( size > 0, "missed a case" );
1274
1275  // --------------------------------------------------------------------
1276  // Check for second bits still needing moving.
1277  if( src_second == dst_second )
1278    return size;               // Self copy; no move
1279  assert( src_second_rc != rc_bad && dst_second_rc != rc_bad, "src_second & dst_second cannot be Bad" );
1280
1281  // Check for second word int-int move
1282  if( src_second_rc == rc_int && dst_second_rc == rc_int )
1283    return impl_mov_helper(cbuf,do_size,src_second,dst_second,size, st);
1284
1285  // Check for second word integer store
1286  if( src_second_rc == rc_int && dst_second_rc == rc_stack )
1287    return impl_helper(cbuf,do_size,false,ra_->reg2offset(dst_second),src_second,0x89,"MOV ",size, st);
1288
1289  // Check for second word integer load
1290  if( dst_second_rc == rc_int && src_second_rc == rc_stack )
1291    return impl_helper(cbuf,do_size,true ,ra_->reg2offset(src_second),dst_second,0x8B,"MOV ",size, st);
1292
1293
1294  Unimplemented();
1295  return 0; // Mute compiler
1296}
1297
1298#ifndef PRODUCT
1299void MachSpillCopyNode::format(PhaseRegAlloc *ra_, outputStream* st) const {
1300  implementation( NULL, ra_, false, st );
1301}
1302#endif
1303
1304void MachSpillCopyNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1305  implementation( &cbuf, ra_, false, NULL );
1306}
1307
1308uint MachSpillCopyNode::size(PhaseRegAlloc *ra_) const {
1309  return implementation( NULL, ra_, true, NULL );
1310}
1311
1312
1313//=============================================================================
1314#ifndef PRODUCT
1315void BoxLockNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1316  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1317  int reg = ra_->get_reg_first(this);
1318  st->print("LEA    %s,[ESP + #%d]",Matcher::regName[reg],offset);
1319}
1320#endif
1321
1322void BoxLockNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1323  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1324  int reg = ra_->get_encode(this);
1325  if( offset >= 128 ) {
1326    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1327    emit_rm(cbuf, 0x2, reg, 0x04);
1328    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1329    emit_d32(cbuf, offset);
1330  }
1331  else {
1332    emit_opcode(cbuf, 0x8D);      // LEA  reg,[SP+offset]
1333    emit_rm(cbuf, 0x1, reg, 0x04);
1334    emit_rm(cbuf, 0x0, 0x04, ESP_enc);
1335    emit_d8(cbuf, offset);
1336  }
1337}
1338
1339uint BoxLockNode::size(PhaseRegAlloc *ra_) const {
1340  int offset = ra_->reg2offset(in_RegMask(0).find_first_elem());
1341  if( offset >= 128 ) {
1342    return 7;
1343  }
1344  else {
1345    return 4;
1346  }
1347}
1348
1349//=============================================================================
1350#ifndef PRODUCT
1351void MachUEPNode::format( PhaseRegAlloc *ra_, outputStream* st ) const {
1352  st->print_cr(  "CMP    EAX,[ECX+4]\t# Inline cache check");
1353  st->print_cr("\tJNE    SharedRuntime::handle_ic_miss_stub");
1354  st->print_cr("\tNOP");
1355  st->print_cr("\tNOP");
1356  if( !OptoBreakpoint )
1357    st->print_cr("\tNOP");
1358}
1359#endif
1360
1361void MachUEPNode::emit(CodeBuffer &cbuf, PhaseRegAlloc *ra_) const {
1362  MacroAssembler masm(&cbuf);
1363#ifdef ASSERT
1364  uint insts_size = cbuf.insts_size();
1365#endif
1366  masm.cmpptr(rax, Address(rcx, oopDesc::klass_offset_in_bytes()));
1367  masm.jump_cc(Assembler::notEqual,
1368               RuntimeAddress(SharedRuntime::get_ic_miss_stub()));
1369  /* WARNING these NOPs are critical so that verified entry point is properly
1370     aligned for patching by NativeJump::patch_verified_entry() */
1371  int nops_cnt = 2;
1372  if( !OptoBreakpoint ) // Leave space for int3
1373     nops_cnt += 1;
1374  masm.nop(nops_cnt);
1375
1376  assert(cbuf.insts_size() - insts_size == size(ra_), "checking code size of inline cache node");
1377}
1378
1379uint MachUEPNode::size(PhaseRegAlloc *ra_) const {
1380  return OptoBreakpoint ? 11 : 12;
1381}
1382
1383
1384//=============================================================================
1385
1386int Matcher::regnum_to_fpu_offset(int regnum) {
1387  return regnum - 32; // The FP registers are in the second chunk
1388}
1389
1390// This is UltraSparc specific, true just means we have fast l2f conversion
1391const bool Matcher::convL2FSupported(void) {
1392  return true;
1393}
1394
1395// Is this branch offset short enough that a short branch can be used?
1396//
1397// NOTE: If the platform does not provide any short branch variants, then
1398//       this method should return false for offset 0.
1399bool Matcher::is_short_branch_offset(int rule, int br_size, int offset) {
1400  // The passed offset is relative to address of the branch.
1401  // On 86 a branch displacement is calculated relative to address
1402  // of a next instruction.
1403  offset -= br_size;
1404
1405  // the short version of jmpConUCF2 contains multiple branches,
1406  // making the reach slightly less
1407  if (rule == jmpConUCF2_rule)
1408    return (-126 <= offset && offset <= 125);
1409  return (-128 <= offset && offset <= 127);
1410}
1411
1412const bool Matcher::isSimpleConstant64(jlong value) {
1413  // Will one (StoreL ConL) be cheaper than two (StoreI ConI)?.
1414  return false;
1415}
1416
1417// The ecx parameter to rep stos for the ClearArray node is in dwords.
1418const bool Matcher::init_array_count_is_in_bytes = false;
1419
1420// Threshold size for cleararray.
1421const int Matcher::init_array_short_size = 8 * BytesPerLong;
1422
1423// Needs 2 CMOV's for longs.
1424const int Matcher::long_cmove_cost() { return 1; }
1425
1426// No CMOVF/CMOVD with SSE/SSE2
1427const int Matcher::float_cmove_cost() { return (UseSSE>=1) ? ConditionalMoveLimit : 0; }
1428
1429// Does the CPU require late expand (see block.cpp for description of late expand)?
1430const bool Matcher::require_postalloc_expand = false;
1431
1432// Should the Matcher clone shifts on addressing modes, expecting them to
1433// be subsumed into complex addressing expressions or compute them into
1434// registers?  True for Intel but false for most RISCs
1435const bool Matcher::clone_shift_expressions = true;
1436
1437// Do we need to mask the count passed to shift instructions or does
1438// the cpu only look at the lower 5/6 bits anyway?
1439const bool Matcher::need_masked_shift_count = false;
1440
1441bool Matcher::narrow_oop_use_complex_address() {
1442  ShouldNotCallThis();
1443  return true;
1444}
1445
1446bool Matcher::narrow_klass_use_complex_address() {
1447  ShouldNotCallThis();
1448  return true;
1449}
1450
1451
1452// Is it better to copy float constants, or load them directly from memory?
1453// Intel can load a float constant from a direct address, requiring no
1454// extra registers.  Most RISCs will have to materialize an address into a
1455// register first, so they would do better to copy the constant from stack.
1456const bool Matcher::rematerialize_float_constants = true;
1457
1458// If CPU can load and store mis-aligned doubles directly then no fixup is
1459// needed.  Else we split the double into 2 integer pieces and move it
1460// piece-by-piece.  Only happens when passing doubles into C code as the
1461// Java calling convention forces doubles to be aligned.
1462const bool Matcher::misaligned_doubles_ok = true;
1463
1464
1465void Matcher::pd_implicit_null_fixup(MachNode *node, uint idx) {
1466  // Get the memory operand from the node
1467  uint numopnds = node->num_opnds();        // Virtual call for number of operands
1468  uint skipped  = node->oper_input_base();  // Sum of leaves skipped so far
1469  assert( idx >= skipped, "idx too low in pd_implicit_null_fixup" );
1470  uint opcnt     = 1;                 // First operand
1471  uint num_edges = node->_opnds[1]->num_edges(); // leaves for first operand
1472  while( idx >= skipped+num_edges ) {
1473    skipped += num_edges;
1474    opcnt++;                          // Bump operand count
1475    assert( opcnt < numopnds, "Accessing non-existent operand" );
1476    num_edges = node->_opnds[opcnt]->num_edges(); // leaves for next operand
1477  }
1478
1479  MachOper *memory = node->_opnds[opcnt];
1480  MachOper *new_memory = NULL;
1481  switch (memory->opcode()) {
1482  case DIRECT:
1483  case INDOFFSET32X:
1484    // No transformation necessary.
1485    return;
1486  case INDIRECT:
1487    new_memory = new indirect_win95_safeOper( );
1488    break;
1489  case INDOFFSET8:
1490    new_memory = new indOffset8_win95_safeOper(memory->disp(NULL, NULL, 0));
1491    break;
1492  case INDOFFSET32:
1493    new_memory = new indOffset32_win95_safeOper(memory->disp(NULL, NULL, 0));
1494    break;
1495  case INDINDEXOFFSET:
1496    new_memory = new indIndexOffset_win95_safeOper(memory->disp(NULL, NULL, 0));
1497    break;
1498  case INDINDEXSCALE:
1499    new_memory = new indIndexScale_win95_safeOper(memory->scale());
1500    break;
1501  case INDINDEXSCALEOFFSET:
1502    new_memory = new indIndexScaleOffset_win95_safeOper(memory->scale(), memory->disp(NULL, NULL, 0));
1503    break;
1504  case LOAD_LONG_INDIRECT:
1505  case LOAD_LONG_INDOFFSET32:
1506    // Does not use EBP as address register, use { EDX, EBX, EDI, ESI}
1507    return;
1508  default:
1509    assert(false, "unexpected memory operand in pd_implicit_null_fixup()");
1510    return;
1511  }
1512  node->_opnds[opcnt] = new_memory;
1513}
1514
1515// Advertise here if the CPU requires explicit rounding operations
1516// to implement the UseStrictFP mode.
1517const bool Matcher::strict_fp_requires_explicit_rounding = true;
1518
1519// Are floats conerted to double when stored to stack during deoptimization?
1520// On x32 it is stored with convertion only when FPU is used for floats.
1521bool Matcher::float_in_double() { return (UseSSE == 0); }
1522
1523// Do ints take an entire long register or just half?
1524const bool Matcher::int_in_long = false;
1525
1526// Return whether or not this register is ever used as an argument.  This
1527// function is used on startup to build the trampoline stubs in generateOptoStub.
1528// Registers not mentioned will be killed by the VM call in the trampoline, and
1529// arguments in those registers not be available to the callee.
1530bool Matcher::can_be_java_arg( int reg ) {
1531  if(  reg == ECX_num   || reg == EDX_num   ) return true;
1532  if( (reg == XMM0_num  || reg == XMM1_num ) && UseSSE>=1 ) return true;
1533  if( (reg == XMM0b_num || reg == XMM1b_num) && UseSSE>=2 ) return true;
1534  return false;
1535}
1536
1537bool Matcher::is_spillable_arg( int reg ) {
1538  return can_be_java_arg(reg);
1539}
1540
1541bool Matcher::use_asm_for_ldiv_by_con( jlong divisor ) {
1542  // Use hardware integer DIV instruction when
1543  // it is faster than a code which use multiply.
1544  // Only when constant divisor fits into 32 bit
1545  // (min_jint is excluded to get only correct
1546  // positive 32 bit values from negative).
1547  return VM_Version::has_fast_idiv() &&
1548         (divisor == (int)divisor && divisor != min_jint);
1549}
1550
1551// Register for DIVI projection of divmodI
1552RegMask Matcher::divI_proj_mask() {
1553  return EAX_REG_mask();
1554}
1555
1556// Register for MODI projection of divmodI
1557RegMask Matcher::modI_proj_mask() {
1558  return EDX_REG_mask();
1559}
1560
1561// Register for DIVL projection of divmodL
1562RegMask Matcher::divL_proj_mask() {
1563  ShouldNotReachHere();
1564  return RegMask();
1565}
1566
1567// Register for MODL projection of divmodL
1568RegMask Matcher::modL_proj_mask() {
1569  ShouldNotReachHere();
1570  return RegMask();
1571}
1572
1573const RegMask Matcher::method_handle_invoke_SP_save_mask() {
1574  return NO_REG_mask();
1575}
1576
1577// Returns true if the high 32 bits of the value is known to be zero.
1578bool is_operand_hi32_zero(Node* n) {
1579  int opc = n->Opcode();
1580  if (opc == Op_AndL) {
1581    Node* o2 = n->in(2);
1582    if (o2->is_Con() && (o2->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1583      return true;
1584    }
1585  }
1586  if (opc == Op_ConL && (n->get_long() & 0xFFFFFFFF00000000LL) == 0LL) {
1587    return true;
1588  }
1589  return false;
1590}
1591
1592%}
1593
1594//----------ENCODING BLOCK-----------------------------------------------------
1595// This block specifies the encoding classes used by the compiler to output
1596// byte streams.  Encoding classes generate functions which are called by
1597// Machine Instruction Nodes in order to generate the bit encoding of the
1598// instruction.  Operands specify their base encoding interface with the
1599// interface keyword.  There are currently supported four interfaces,
1600// REG_INTER, CONST_INTER, MEMORY_INTER, & COND_INTER.  REG_INTER causes an
1601// operand to generate a function which returns its register number when
1602// queried.   CONST_INTER causes an operand to generate a function which
1603// returns the value of the constant when queried.  MEMORY_INTER causes an
1604// operand to generate four functions which return the Base Register, the
1605// Index Register, the Scale Value, and the Offset Value of the operand when
1606// queried.  COND_INTER causes an operand to generate six functions which
1607// return the encoding code (ie - encoding bits for the instruction)
1608// associated with each basic boolean condition for a conditional instruction.
1609// Instructions specify two basic values for encoding.  They use the
1610// ins_encode keyword to specify their encoding class (which must be one of
1611// the class names specified in the encoding block), and they use the
1612// opcode keyword to specify, in order, their primary, secondary, and
1613// tertiary opcode.  Only the opcode sections which a particular instruction
1614// needs for encoding need to be specified.
1615encode %{
1616  // Build emit functions for each basic byte or larger field in the intel
1617  // encoding scheme (opcode, rm, sib, immediate), and call them from C++
1618  // code in the enc_class source block.  Emit functions will live in the
1619  // main source block for now.  In future, we can generalize this by
1620  // adding a syntax that specifies the sizes of fields in an order,
1621  // so that the adlc can build the emit functions automagically
1622
1623  // Emit primary opcode
1624  enc_class OpcP %{
1625    emit_opcode(cbuf, $primary);
1626  %}
1627
1628  // Emit secondary opcode
1629  enc_class OpcS %{
1630    emit_opcode(cbuf, $secondary);
1631  %}
1632
1633  // Emit opcode directly
1634  enc_class Opcode(immI d8) %{
1635    emit_opcode(cbuf, $d8$$constant);
1636  %}
1637
1638  enc_class SizePrefix %{
1639    emit_opcode(cbuf,0x66);
1640  %}
1641
1642  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
1643    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1644  %}
1645
1646  enc_class OpcRegReg (immI opcode, rRegI dst, rRegI src) %{    // OpcRegReg(Many)
1647    emit_opcode(cbuf,$opcode$$constant);
1648    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
1649  %}
1650
1651  enc_class mov_r32_imm0( rRegI dst ) %{
1652    emit_opcode( cbuf, 0xB8 + $dst$$reg ); // 0xB8+ rd   -- MOV r32  ,imm32
1653    emit_d32   ( cbuf, 0x0  );             //                         imm32==0x0
1654  %}
1655
1656  enc_class cdq_enc %{
1657    // Full implementation of Java idiv and irem; checks for
1658    // special case as described in JVM spec., p.243 & p.271.
1659    //
1660    //         normal case                           special case
1661    //
1662    // input : rax,: dividend                         min_int
1663    //         reg: divisor                          -1
1664    //
1665    // output: rax,: quotient  (= rax, idiv reg)       min_int
1666    //         rdx: remainder (= rax, irem reg)       0
1667    //
1668    //  Code sequnce:
1669    //
1670    //  81 F8 00 00 00 80    cmp         rax,80000000h
1671    //  0F 85 0B 00 00 00    jne         normal_case
1672    //  33 D2                xor         rdx,edx
1673    //  83 F9 FF             cmp         rcx,0FFh
1674    //  0F 84 03 00 00 00    je          done
1675    //                  normal_case:
1676    //  99                   cdq
1677    //  F7 F9                idiv        rax,ecx
1678    //                  done:
1679    //
1680    emit_opcode(cbuf,0x81); emit_d8(cbuf,0xF8);
1681    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);
1682    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x80);                     // cmp rax,80000000h
1683    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x85);
1684    emit_opcode(cbuf,0x0B); emit_d8(cbuf,0x00);
1685    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // jne normal_case
1686    emit_opcode(cbuf,0x33); emit_d8(cbuf,0xD2);                     // xor rdx,edx
1687    emit_opcode(cbuf,0x83); emit_d8(cbuf,0xF9); emit_d8(cbuf,0xFF); // cmp rcx,0FFh
1688    emit_opcode(cbuf,0x0F); emit_d8(cbuf,0x84);
1689    emit_opcode(cbuf,0x03); emit_d8(cbuf,0x00);
1690    emit_opcode(cbuf,0x00); emit_d8(cbuf,0x00);                     // je done
1691    // normal_case:
1692    emit_opcode(cbuf,0x99);                                         // cdq
1693    // idiv (note: must be emitted by the user of this rule)
1694    // normal:
1695  %}
1696
1697  // Dense encoding for older common ops
1698  enc_class Opc_plus(immI opcode, rRegI reg) %{
1699    emit_opcode(cbuf, $opcode$$constant + $reg$$reg);
1700  %}
1701
1702
1703  // Opcde enc_class for 8/32 bit immediate instructions with sign-extension
1704  enc_class OpcSE (immI imm) %{ // Emit primary opcode and set sign-extend bit
1705    // Check for 8-bit immediate, and set sign extend bit in opcode
1706    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1707      emit_opcode(cbuf, $primary | 0x02);
1708    }
1709    else {                          // If 32-bit immediate
1710      emit_opcode(cbuf, $primary);
1711    }
1712  %}
1713
1714  enc_class OpcSErm (rRegI dst, immI imm) %{    // OpcSEr/m
1715    // Emit primary opcode and set sign-extend bit
1716    // Check for 8-bit immediate, and set sign extend bit in opcode
1717    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1718      emit_opcode(cbuf, $primary | 0x02);    }
1719    else {                          // If 32-bit immediate
1720      emit_opcode(cbuf, $primary);
1721    }
1722    // Emit r/m byte with secondary opcode, after primary opcode.
1723    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1724  %}
1725
1726  enc_class Con8or32 (immI imm) %{    // Con8or32(storeImmI), 8 or 32 bits
1727    // Check for 8-bit immediate, and set sign extend bit in opcode
1728    if (($imm$$constant >= -128) && ($imm$$constant <= 127)) {
1729      $$$emit8$imm$$constant;
1730    }
1731    else {                          // If 32-bit immediate
1732      // Output immediate
1733      $$$emit32$imm$$constant;
1734    }
1735  %}
1736
1737  enc_class Long_OpcSErm_Lo(eRegL dst, immL imm) %{
1738    // Emit primary opcode and set sign-extend bit
1739    // Check for 8-bit immediate, and set sign extend bit in opcode
1740    int con = (int)$imm$$constant; // Throw away top bits
1741    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1742    // Emit r/m byte with secondary opcode, after primary opcode.
1743    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1744    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1745    else                               emit_d32(cbuf,con);
1746  %}
1747
1748  enc_class Long_OpcSErm_Hi(eRegL dst, immL imm) %{
1749    // Emit primary opcode and set sign-extend bit
1750    // Check for 8-bit immediate, and set sign extend bit in opcode
1751    int con = (int)($imm$$constant >> 32); // Throw away bottom bits
1752    emit_opcode(cbuf, ((con >= -128) && (con <= 127)) ? ($primary | 0x02) : $primary);
1753    // Emit r/m byte with tertiary opcode, after primary opcode.
1754    emit_rm(cbuf, 0x3, $tertiary, HIGH_FROM_LOW($dst$$reg));
1755    if ((con >= -128) && (con <= 127)) emit_d8 (cbuf,con);
1756    else                               emit_d32(cbuf,con);
1757  %}
1758
1759  enc_class OpcSReg (rRegI dst) %{    // BSWAP
1760    emit_cc(cbuf, $secondary, $dst$$reg );
1761  %}
1762
1763  enc_class bswap_long_bytes(eRegL dst) %{ // BSWAP
1764    int destlo = $dst$$reg;
1765    int desthi = HIGH_FROM_LOW(destlo);
1766    // bswap lo
1767    emit_opcode(cbuf, 0x0F);
1768    emit_cc(cbuf, 0xC8, destlo);
1769    // bswap hi
1770    emit_opcode(cbuf, 0x0F);
1771    emit_cc(cbuf, 0xC8, desthi);
1772    // xchg lo and hi
1773    emit_opcode(cbuf, 0x87);
1774    emit_rm(cbuf, 0x3, destlo, desthi);
1775  %}
1776
1777  enc_class RegOpc (rRegI div) %{    // IDIV, IMOD, JMP indirect, ...
1778    emit_rm(cbuf, 0x3, $secondary, $div$$reg );
1779  %}
1780
1781  enc_class enc_cmov(cmpOp cop ) %{ // CMOV
1782    $$$emit8$primary;
1783    emit_cc(cbuf, $secondary, $cop$$cmpcode);
1784  %}
1785
1786  enc_class enc_cmov_dpr(cmpOp cop, regDPR src ) %{ // CMOV
1787    int op = 0xDA00 + $cop$$cmpcode + ($src$$reg-1);
1788    emit_d8(cbuf, op >> 8 );
1789    emit_d8(cbuf, op & 255);
1790  %}
1791
1792  // emulate a CMOV with a conditional branch around a MOV
1793  enc_class enc_cmov_branch( cmpOp cop, immI brOffs ) %{ // CMOV
1794    // Invert sense of branch from sense of CMOV
1795    emit_cc( cbuf, 0x70, ($cop$$cmpcode^1) );
1796    emit_d8( cbuf, $brOffs$$constant );
1797  %}
1798
1799  enc_class enc_PartialSubtypeCheck( ) %{
1800    Register Redi = as_Register(EDI_enc); // result register
1801    Register Reax = as_Register(EAX_enc); // super class
1802    Register Recx = as_Register(ECX_enc); // killed
1803    Register Resi = as_Register(ESI_enc); // sub class
1804    Label miss;
1805
1806    MacroAssembler _masm(&cbuf);
1807    __ check_klass_subtype_slow_path(Resi, Reax, Recx, Redi,
1808                                     NULL, &miss,
1809                                     /*set_cond_codes:*/ true);
1810    if ($primary) {
1811      __ xorptr(Redi, Redi);
1812    }
1813    __ bind(miss);
1814  %}
1815
1816  enc_class FFree_Float_Stack_All %{    // Free_Float_Stack_All
1817    MacroAssembler masm(&cbuf);
1818    int start = masm.offset();
1819    if (UseSSE >= 2) {
1820      if (VerifyFPU) {
1821        masm.verify_FPU(0, "must be empty in SSE2+ mode");
1822      }
1823    } else {
1824      // External c_calling_convention expects the FPU stack to be 'clean'.
1825      // Compiled code leaves it dirty.  Do cleanup now.
1826      masm.empty_FPU_stack();
1827    }
1828    if (sizeof_FFree_Float_Stack_All == -1) {
1829      sizeof_FFree_Float_Stack_All = masm.offset() - start;
1830    } else {
1831      assert(masm.offset() - start == sizeof_FFree_Float_Stack_All, "wrong size");
1832    }
1833  %}
1834
1835  enc_class Verify_FPU_For_Leaf %{
1836    if( VerifyFPU ) {
1837      MacroAssembler masm(&cbuf);
1838      masm.verify_FPU( -3, "Returning from Runtime Leaf call");
1839    }
1840  %}
1841
1842  enc_class Java_To_Runtime (method meth) %{    // CALL Java_To_Runtime, Java_To_Runtime_Leaf
1843    // This is the instruction starting address for relocation info.
1844    cbuf.set_insts_mark();
1845    $$$emit8$primary;
1846    // CALL directly to the runtime
1847    emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1848                runtime_call_Relocation::spec(), RELOC_IMM32 );
1849
1850    if (UseSSE >= 2) {
1851      MacroAssembler _masm(&cbuf);
1852      BasicType rt = tf()->return_type();
1853
1854      if ((rt == T_FLOAT || rt == T_DOUBLE) && !return_value_is_used()) {
1855        // A C runtime call where the return value is unused.  In SSE2+
1856        // mode the result needs to be removed from the FPU stack.  It's
1857        // likely that this function call could be removed by the
1858        // optimizer if the C function is a pure function.
1859        __ ffree(0);
1860      } else if (rt == T_FLOAT) {
1861        __ lea(rsp, Address(rsp, -4));
1862        __ fstp_s(Address(rsp, 0));
1863        __ movflt(xmm0, Address(rsp, 0));
1864        __ lea(rsp, Address(rsp,  4));
1865      } else if (rt == T_DOUBLE) {
1866        __ lea(rsp, Address(rsp, -8));
1867        __ fstp_d(Address(rsp, 0));
1868        __ movdbl(xmm0, Address(rsp, 0));
1869        __ lea(rsp, Address(rsp,  8));
1870      }
1871    }
1872  %}
1873
1874
1875  enc_class pre_call_resets %{
1876    // If method sets FPU control word restore it here
1877    debug_only(int off0 = cbuf.insts_size());
1878    if (ra_->C->in_24_bit_fp_mode()) {
1879      MacroAssembler _masm(&cbuf);
1880      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
1881    }
1882    if (ra_->C->max_vector_size() > 16) {
1883      // Clear upper bits of YMM registers when current compiled code uses
1884      // wide vectors to avoid AVX <-> SSE transition penalty during call.
1885      MacroAssembler _masm(&cbuf);
1886      __ vzeroupper();
1887    }
1888    debug_only(int off1 = cbuf.insts_size());
1889    assert(off1 - off0 == pre_call_resets_size(), "correct size prediction");
1890  %}
1891
1892  enc_class post_call_FPU %{
1893    // If method sets FPU control word do it here also
1894    if (Compile::current()->in_24_bit_fp_mode()) {
1895      MacroAssembler masm(&cbuf);
1896      masm.fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
1897    }
1898  %}
1899
1900  enc_class Java_Static_Call (method meth) %{    // JAVA STATIC CALL
1901    // CALL to fixup routine.  Fixup routine uses ScopeDesc info to determine
1902    // who we intended to call.
1903    cbuf.set_insts_mark();
1904    $$$emit8$primary;
1905
1906    if (!_method) {
1907      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1908                     runtime_call_Relocation::spec(),
1909                     RELOC_IMM32);
1910    } else {
1911      int method_index = resolved_method_index(cbuf);
1912      RelocationHolder rspec = _optimized_virtual ? opt_virtual_call_Relocation::spec(method_index)
1913                                                  : static_call_Relocation::spec(method_index);
1914      emit_d32_reloc(cbuf, ($meth$$method - (int)(cbuf.insts_end()) - 4),
1915                     rspec, RELOC_DISP32);
1916      // Emit stubs for static call.
1917      address stub = CompiledStaticCall::emit_to_interp_stub(cbuf);
1918      if (stub == NULL) {
1919        ciEnv::current()->record_failure("CodeCache is full");
1920        return;
1921      }
1922    }
1923  %}
1924
1925  enc_class Java_Dynamic_Call (method meth) %{    // JAVA DYNAMIC CALL
1926    MacroAssembler _masm(&cbuf);
1927    __ ic_call((address)$meth$$method, resolved_method_index(cbuf));
1928  %}
1929
1930  enc_class Java_Compiled_Call (method meth) %{    // JAVA COMPILED CALL
1931    int disp = in_bytes(Method::from_compiled_offset());
1932    assert( -128 <= disp && disp <= 127, "compiled_code_offset isn't small");
1933
1934    // CALL *[EAX+in_bytes(Method::from_compiled_code_entry_point_offset())]
1935    cbuf.set_insts_mark();
1936    $$$emit8$primary;
1937    emit_rm(cbuf, 0x01, $secondary, EAX_enc );  // R/M byte
1938    emit_d8(cbuf, disp);             // Displacement
1939
1940  %}
1941
1942//   Following encoding is no longer used, but may be restored if calling
1943//   convention changes significantly.
1944//   Became: Xor_Reg(EBP), Java_To_Runtime( labl )
1945//
1946//   enc_class Java_Interpreter_Call (label labl) %{    // JAVA INTERPRETER CALL
1947//     // int ic_reg     = Matcher::inline_cache_reg();
1948//     // int ic_encode  = Matcher::_regEncode[ic_reg];
1949//     // int imo_reg    = Matcher::interpreter_method_oop_reg();
1950//     // int imo_encode = Matcher::_regEncode[imo_reg];
1951//
1952//     // // Interpreter expects method_oop in EBX, currently a callee-saved register,
1953//     // // so we load it immediately before the call
1954//     // emit_opcode(cbuf, 0x8B);                     // MOV    imo_reg,ic_reg  # method_oop
1955//     // emit_rm(cbuf, 0x03, imo_encode, ic_encode ); // R/M byte
1956//
1957//     // xor rbp,ebp
1958//     emit_opcode(cbuf, 0x33);
1959//     emit_rm(cbuf, 0x3, EBP_enc, EBP_enc);
1960//
1961//     // CALL to interpreter.
1962//     cbuf.set_insts_mark();
1963//     $$$emit8$primary;
1964//     emit_d32_reloc(cbuf, ($labl$$label - (int)(cbuf.insts_end()) - 4),
1965//                 runtime_call_Relocation::spec(), RELOC_IMM32 );
1966//   %}
1967
1968  enc_class RegOpcImm (rRegI dst, immI8 shift) %{    // SHL, SAR, SHR
1969    $$$emit8$primary;
1970    emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
1971    $$$emit8$shift$$constant;
1972  %}
1973
1974  enc_class LdImmI (rRegI dst, immI src) %{    // Load Immediate
1975    // Load immediate does not have a zero or sign extended version
1976    // for 8-bit immediates
1977    emit_opcode(cbuf, 0xB8 + $dst$$reg);
1978    $$$emit32$src$$constant;
1979  %}
1980
1981  enc_class LdImmP (rRegI dst, immI src) %{    // Load Immediate
1982    // Load immediate does not have a zero or sign extended version
1983    // for 8-bit immediates
1984    emit_opcode(cbuf, $primary + $dst$$reg);
1985    $$$emit32$src$$constant;
1986  %}
1987
1988  enc_class LdImmL_Lo( eRegL dst, immL src) %{    // Load Immediate
1989    // Load immediate does not have a zero or sign extended version
1990    // for 8-bit immediates
1991    int dst_enc = $dst$$reg;
1992    int src_con = $src$$constant & 0x0FFFFFFFFL;
1993    if (src_con == 0) {
1994      // xor dst, dst
1995      emit_opcode(cbuf, 0x33);
1996      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
1997    } else {
1998      emit_opcode(cbuf, $primary + dst_enc);
1999      emit_d32(cbuf, src_con);
2000    }
2001  %}
2002
2003  enc_class LdImmL_Hi( eRegL dst, immL src) %{    // Load Immediate
2004    // Load immediate does not have a zero or sign extended version
2005    // for 8-bit immediates
2006    int dst_enc = $dst$$reg + 2;
2007    int src_con = ((julong)($src$$constant)) >> 32;
2008    if (src_con == 0) {
2009      // xor dst, dst
2010      emit_opcode(cbuf, 0x33);
2011      emit_rm(cbuf, 0x3, dst_enc, dst_enc);
2012    } else {
2013      emit_opcode(cbuf, $primary + dst_enc);
2014      emit_d32(cbuf, src_con);
2015    }
2016  %}
2017
2018
2019  // Encode a reg-reg copy.  If it is useless, then empty encoding.
2020  enc_class enc_Copy( rRegI dst, rRegI src ) %{
2021    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2022  %}
2023
2024  enc_class enc_CopyL_Lo( rRegI dst, eRegL src ) %{
2025    encode_Copy( cbuf, $dst$$reg, $src$$reg );
2026  %}
2027
2028  enc_class RegReg (rRegI dst, rRegI src) %{    // RegReg(Many)
2029    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2030  %}
2031
2032  enc_class RegReg_Lo(eRegL dst, eRegL src) %{    // RegReg(Many)
2033    $$$emit8$primary;
2034    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2035  %}
2036
2037  enc_class RegReg_Hi(eRegL dst, eRegL src) %{    // RegReg(Many)
2038    $$$emit8$secondary;
2039    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2040  %}
2041
2042  enc_class RegReg_Lo2(eRegL dst, eRegL src) %{    // RegReg(Many)
2043    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2044  %}
2045
2046  enc_class RegReg_Hi2(eRegL dst, eRegL src) %{    // RegReg(Many)
2047    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg));
2048  %}
2049
2050  enc_class RegReg_HiLo( eRegL src, rRegI dst ) %{
2051    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($src$$reg));
2052  %}
2053
2054  enc_class Con32 (immI src) %{    // Con32(storeImmI)
2055    // Output immediate
2056    $$$emit32$src$$constant;
2057  %}
2058
2059  enc_class Con32FPR_as_bits(immFPR src) %{        // storeF_imm
2060    // Output Float immediate bits
2061    jfloat jf = $src$$constant;
2062    int    jf_as_bits = jint_cast( jf );
2063    emit_d32(cbuf, jf_as_bits);
2064  %}
2065
2066  enc_class Con32F_as_bits(immF src) %{      // storeX_imm
2067    // Output Float immediate bits
2068    jfloat jf = $src$$constant;
2069    int    jf_as_bits = jint_cast( jf );
2070    emit_d32(cbuf, jf_as_bits);
2071  %}
2072
2073  enc_class Con16 (immI src) %{    // Con16(storeImmI)
2074    // Output immediate
2075    $$$emit16$src$$constant;
2076  %}
2077
2078  enc_class Con_d32(immI src) %{
2079    emit_d32(cbuf,$src$$constant);
2080  %}
2081
2082  enc_class conmemref (eRegP t1) %{    // Con32(storeImmI)
2083    // Output immediate memory reference
2084    emit_rm(cbuf, 0x00, $t1$$reg, 0x05 );
2085    emit_d32(cbuf, 0x00);
2086  %}
2087
2088  enc_class lock_prefix( ) %{
2089    if( os::is_MP() )
2090      emit_opcode(cbuf,0xF0);         // [Lock]
2091  %}
2092
2093  // Cmp-xchg long value.
2094  // Note: we need to swap rbx, and rcx before and after the
2095  //       cmpxchg8 instruction because the instruction uses
2096  //       rcx as the high order word of the new value to store but
2097  //       our register encoding uses rbx,.
2098  enc_class enc_cmpxchg8(eSIRegP mem_ptr) %{
2099
2100    // XCHG  rbx,ecx
2101    emit_opcode(cbuf,0x87);
2102    emit_opcode(cbuf,0xD9);
2103    // [Lock]
2104    if( os::is_MP() )
2105      emit_opcode(cbuf,0xF0);
2106    // CMPXCHG8 [Eptr]
2107    emit_opcode(cbuf,0x0F);
2108    emit_opcode(cbuf,0xC7);
2109    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2110    // XCHG  rbx,ecx
2111    emit_opcode(cbuf,0x87);
2112    emit_opcode(cbuf,0xD9);
2113  %}
2114
2115  enc_class enc_cmpxchg(eSIRegP mem_ptr) %{
2116    // [Lock]
2117    if( os::is_MP() )
2118      emit_opcode(cbuf,0xF0);
2119
2120    // CMPXCHG [Eptr]
2121    emit_opcode(cbuf,0x0F);
2122    emit_opcode(cbuf,0xB1);
2123    emit_rm( cbuf, 0x0, 1, $mem_ptr$$reg );
2124  %}
2125
2126  enc_class enc_flags_ne_to_boolean( iRegI res ) %{
2127    int res_encoding = $res$$reg;
2128
2129    // MOV  res,0
2130    emit_opcode( cbuf, 0xB8 + res_encoding);
2131    emit_d32( cbuf, 0 );
2132    // JNE,s  fail
2133    emit_opcode(cbuf,0x75);
2134    emit_d8(cbuf, 5 );
2135    // MOV  res,1
2136    emit_opcode( cbuf, 0xB8 + res_encoding);
2137    emit_d32( cbuf, 1 );
2138    // fail:
2139  %}
2140
2141  enc_class set_instruction_start( ) %{
2142    cbuf.set_insts_mark();            // Mark start of opcode for reloc info in mem operand
2143  %}
2144
2145  enc_class RegMem (rRegI ereg, memory mem) %{    // emit_reg_mem
2146    int reg_encoding = $ereg$$reg;
2147    int base  = $mem$$base;
2148    int index = $mem$$index;
2149    int scale = $mem$$scale;
2150    int displace = $mem$$disp;
2151    relocInfo::relocType disp_reloc = $mem->disp_reloc();
2152    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2153  %}
2154
2155  enc_class RegMem_Hi(eRegL ereg, memory mem) %{    // emit_reg_mem
2156    int reg_encoding = HIGH_FROM_LOW($ereg$$reg);  // Hi register of pair, computed from lo
2157    int base  = $mem$$base;
2158    int index = $mem$$index;
2159    int scale = $mem$$scale;
2160    int displace = $mem$$disp + 4;      // Offset is 4 further in memory
2161    assert( $mem->disp_reloc() == relocInfo::none, "Cannot add 4 to oop" );
2162    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, relocInfo::none);
2163  %}
2164
2165  enc_class move_long_small_shift( eRegL dst, immI_1_31 cnt ) %{
2166    int r1, r2;
2167    if( $tertiary == 0xA4 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2168    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2169    emit_opcode(cbuf,0x0F);
2170    emit_opcode(cbuf,$tertiary);
2171    emit_rm(cbuf, 0x3, r1, r2);
2172    emit_d8(cbuf,$cnt$$constant);
2173    emit_d8(cbuf,$primary);
2174    emit_rm(cbuf, 0x3, $secondary, r1);
2175    emit_d8(cbuf,$cnt$$constant);
2176  %}
2177
2178  enc_class move_long_big_shift_sign( eRegL dst, immI_32_63 cnt ) %{
2179    emit_opcode( cbuf, 0x8B ); // Move
2180    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2181    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2182      emit_d8(cbuf,$primary);
2183      emit_rm(cbuf, 0x3, $secondary, $dst$$reg);
2184      emit_d8(cbuf,$cnt$$constant-32);
2185    }
2186    emit_d8(cbuf,$primary);
2187    emit_rm(cbuf, 0x3, $secondary, HIGH_FROM_LOW($dst$$reg));
2188    emit_d8(cbuf,31);
2189  %}
2190
2191  enc_class move_long_big_shift_clr( eRegL dst, immI_32_63 cnt ) %{
2192    int r1, r2;
2193    if( $secondary == 0x5 ) { r1 = $dst$$reg;  r2 = HIGH_FROM_LOW($dst$$reg); }
2194    else                    { r2 = $dst$$reg;  r1 = HIGH_FROM_LOW($dst$$reg); }
2195
2196    emit_opcode( cbuf, 0x8B ); // Move r1,r2
2197    emit_rm(cbuf, 0x3, r1, r2);
2198    if( $cnt$$constant > 32 ) { // Shift, if not by zero
2199      emit_opcode(cbuf,$primary);
2200      emit_rm(cbuf, 0x3, $secondary, r1);
2201      emit_d8(cbuf,$cnt$$constant-32);
2202    }
2203    emit_opcode(cbuf,0x33);  // XOR r2,r2
2204    emit_rm(cbuf, 0x3, r2, r2);
2205  %}
2206
2207  // Clone of RegMem but accepts an extra parameter to access each
2208  // half of a double in memory; it never needs relocation info.
2209  enc_class Mov_MemD_half_to_Reg (immI opcode, memory mem, immI disp_for_half, rRegI rm_reg) %{
2210    emit_opcode(cbuf,$opcode$$constant);
2211    int reg_encoding = $rm_reg$$reg;
2212    int base     = $mem$$base;
2213    int index    = $mem$$index;
2214    int scale    = $mem$$scale;
2215    int displace = $mem$$disp + $disp_for_half$$constant;
2216    relocInfo::relocType disp_reloc = relocInfo::none;
2217    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2218  %}
2219
2220  // !!!!! Special Custom Code used by MemMove, and stack access instructions !!!!!
2221  //
2222  // Clone of RegMem except the RM-byte's reg/opcode field is an ADLC-time constant
2223  // and it never needs relocation information.
2224  // Frequently used to move data between FPU's Stack Top and memory.
2225  enc_class RMopc_Mem_no_oop (immI rm_opcode, memory mem) %{
2226    int rm_byte_opcode = $rm_opcode$$constant;
2227    int base     = $mem$$base;
2228    int index    = $mem$$index;
2229    int scale    = $mem$$scale;
2230    int displace = $mem$$disp;
2231    assert( $mem->disp_reloc() == relocInfo::none, "No oops here because no reloc info allowed" );
2232    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, relocInfo::none);
2233  %}
2234
2235  enc_class RMopc_Mem (immI rm_opcode, memory mem) %{
2236    int rm_byte_opcode = $rm_opcode$$constant;
2237    int base     = $mem$$base;
2238    int index    = $mem$$index;
2239    int scale    = $mem$$scale;
2240    int displace = $mem$$disp;
2241    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2242    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
2243  %}
2244
2245  enc_class RegLea (rRegI dst, rRegI src0, immI src1 ) %{    // emit_reg_lea
2246    int reg_encoding = $dst$$reg;
2247    int base         = $src0$$reg;      // 0xFFFFFFFF indicates no base
2248    int index        = 0x04;            // 0x04 indicates no index
2249    int scale        = 0x00;            // 0x00 indicates no scale
2250    int displace     = $src1$$constant; // 0x00 indicates no displacement
2251    relocInfo::relocType disp_reloc = relocInfo::none;
2252    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2253  %}
2254
2255  enc_class min_enc (rRegI dst, rRegI src) %{    // MIN
2256    // Compare dst,src
2257    emit_opcode(cbuf,0x3B);
2258    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2259    // jmp dst < src around move
2260    emit_opcode(cbuf,0x7C);
2261    emit_d8(cbuf,2);
2262    // move dst,src
2263    emit_opcode(cbuf,0x8B);
2264    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2265  %}
2266
2267  enc_class max_enc (rRegI dst, rRegI src) %{    // MAX
2268    // Compare dst,src
2269    emit_opcode(cbuf,0x3B);
2270    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2271    // jmp dst > src around move
2272    emit_opcode(cbuf,0x7F);
2273    emit_d8(cbuf,2);
2274    // move dst,src
2275    emit_opcode(cbuf,0x8B);
2276    emit_rm(cbuf, 0x3, $dst$$reg, $src$$reg);
2277  %}
2278
2279  enc_class enc_FPR_store(memory mem, regDPR src) %{
2280    // If src is FPR1, we can just FST to store it.
2281    // Else we need to FLD it to FPR1, then FSTP to store/pop it.
2282    int reg_encoding = 0x2; // Just store
2283    int base  = $mem$$base;
2284    int index = $mem$$index;
2285    int scale = $mem$$scale;
2286    int displace = $mem$$disp;
2287    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
2288    if( $src$$reg != FPR1L_enc ) {
2289      reg_encoding = 0x3;  // Store & pop
2290      emit_opcode( cbuf, 0xD9 ); // FLD (i.e., push it)
2291      emit_d8( cbuf, 0xC0-1+$src$$reg );
2292    }
2293    cbuf.set_insts_mark();       // Mark start of opcode for reloc info in mem operand
2294    emit_opcode(cbuf,$primary);
2295    encode_RegMem(cbuf, reg_encoding, base, index, scale, displace, disp_reloc);
2296  %}
2297
2298  enc_class neg_reg(rRegI dst) %{
2299    // NEG $dst
2300    emit_opcode(cbuf,0xF7);
2301    emit_rm(cbuf, 0x3, 0x03, $dst$$reg );
2302  %}
2303
2304  enc_class setLT_reg(eCXRegI dst) %{
2305    // SETLT $dst
2306    emit_opcode(cbuf,0x0F);
2307    emit_opcode(cbuf,0x9C);
2308    emit_rm( cbuf, 0x3, 0x4, $dst$$reg );
2309  %}
2310
2311  enc_class enc_cmpLTP(ncxRegI p, ncxRegI q, ncxRegI y, eCXRegI tmp) %{    // cadd_cmpLT
2312    int tmpReg = $tmp$$reg;
2313
2314    // SUB $p,$q
2315    emit_opcode(cbuf,0x2B);
2316    emit_rm(cbuf, 0x3, $p$$reg, $q$$reg);
2317    // SBB $tmp,$tmp
2318    emit_opcode(cbuf,0x1B);
2319    emit_rm(cbuf, 0x3, tmpReg, tmpReg);
2320    // AND $tmp,$y
2321    emit_opcode(cbuf,0x23);
2322    emit_rm(cbuf, 0x3, tmpReg, $y$$reg);
2323    // ADD $p,$tmp
2324    emit_opcode(cbuf,0x03);
2325    emit_rm(cbuf, 0x3, $p$$reg, tmpReg);
2326  %}
2327
2328  enc_class shift_left_long( eRegL dst, eCXRegI shift ) %{
2329    // TEST shift,32
2330    emit_opcode(cbuf,0xF7);
2331    emit_rm(cbuf, 0x3, 0, ECX_enc);
2332    emit_d32(cbuf,0x20);
2333    // JEQ,s small
2334    emit_opcode(cbuf, 0x74);
2335    emit_d8(cbuf, 0x04);
2336    // MOV    $dst.hi,$dst.lo
2337    emit_opcode( cbuf, 0x8B );
2338    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2339    // CLR    $dst.lo
2340    emit_opcode(cbuf, 0x33);
2341    emit_rm(cbuf, 0x3, $dst$$reg, $dst$$reg);
2342// small:
2343    // SHLD   $dst.hi,$dst.lo,$shift
2344    emit_opcode(cbuf,0x0F);
2345    emit_opcode(cbuf,0xA5);
2346    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg));
2347    // SHL    $dst.lo,$shift"
2348    emit_opcode(cbuf,0xD3);
2349    emit_rm(cbuf, 0x3, 0x4, $dst$$reg );
2350  %}
2351
2352  enc_class shift_right_long( eRegL dst, eCXRegI shift ) %{
2353    // TEST shift,32
2354    emit_opcode(cbuf,0xF7);
2355    emit_rm(cbuf, 0x3, 0, ECX_enc);
2356    emit_d32(cbuf,0x20);
2357    // JEQ,s small
2358    emit_opcode(cbuf, 0x74);
2359    emit_d8(cbuf, 0x04);
2360    // MOV    $dst.lo,$dst.hi
2361    emit_opcode( cbuf, 0x8B );
2362    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2363    // CLR    $dst.hi
2364    emit_opcode(cbuf, 0x33);
2365    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($dst$$reg));
2366// small:
2367    // SHRD   $dst.lo,$dst.hi,$shift
2368    emit_opcode(cbuf,0x0F);
2369    emit_opcode(cbuf,0xAD);
2370    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2371    // SHR    $dst.hi,$shift"
2372    emit_opcode(cbuf,0xD3);
2373    emit_rm(cbuf, 0x3, 0x5, HIGH_FROM_LOW($dst$$reg) );
2374  %}
2375
2376  enc_class shift_right_arith_long( eRegL dst, eCXRegI shift ) %{
2377    // TEST shift,32
2378    emit_opcode(cbuf,0xF7);
2379    emit_rm(cbuf, 0x3, 0, ECX_enc);
2380    emit_d32(cbuf,0x20);
2381    // JEQ,s small
2382    emit_opcode(cbuf, 0x74);
2383    emit_d8(cbuf, 0x05);
2384    // MOV    $dst.lo,$dst.hi
2385    emit_opcode( cbuf, 0x8B );
2386    emit_rm(cbuf, 0x3, $dst$$reg, HIGH_FROM_LOW($dst$$reg) );
2387    // SAR    $dst.hi,31
2388    emit_opcode(cbuf, 0xC1);
2389    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW($dst$$reg) );
2390    emit_d8(cbuf, 0x1F );
2391// small:
2392    // SHRD   $dst.lo,$dst.hi,$shift
2393    emit_opcode(cbuf,0x0F);
2394    emit_opcode(cbuf,0xAD);
2395    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg);
2396    // SAR    $dst.hi,$shift"
2397    emit_opcode(cbuf,0xD3);
2398    emit_rm(cbuf, 0x3, 0x7, HIGH_FROM_LOW($dst$$reg) );
2399  %}
2400
2401
2402  // ----------------- Encodings for floating point unit -----------------
2403  // May leave result in FPU-TOS or FPU reg depending on opcodes
2404  enc_class OpcReg_FPR(regFPR src) %{    // FMUL, FDIV
2405    $$$emit8$primary;
2406    emit_rm(cbuf, 0x3, $secondary, $src$$reg );
2407  %}
2408
2409  // Pop argument in FPR0 with FSTP ST(0)
2410  enc_class PopFPU() %{
2411    emit_opcode( cbuf, 0xDD );
2412    emit_d8( cbuf, 0xD8 );
2413  %}
2414
2415  // !!!!! equivalent to Pop_Reg_F
2416  enc_class Pop_Reg_DPR( regDPR dst ) %{
2417    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2418    emit_d8( cbuf, 0xD8+$dst$$reg );
2419  %}
2420
2421  enc_class Push_Reg_DPR( regDPR dst ) %{
2422    emit_opcode( cbuf, 0xD9 );
2423    emit_d8( cbuf, 0xC0-1+$dst$$reg );   // FLD ST(i-1)
2424  %}
2425
2426  enc_class strictfp_bias1( regDPR dst ) %{
2427    emit_opcode( cbuf, 0xDB );           // FLD m80real
2428    emit_opcode( cbuf, 0x2D );
2429    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias1() );
2430    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2431    emit_opcode( cbuf, 0xC8+$dst$$reg );
2432  %}
2433
2434  enc_class strictfp_bias2( regDPR dst ) %{
2435    emit_opcode( cbuf, 0xDB );           // FLD m80real
2436    emit_opcode( cbuf, 0x2D );
2437    emit_d32( cbuf, (int)StubRoutines::addr_fpu_subnormal_bias2() );
2438    emit_opcode( cbuf, 0xDE );           // FMULP ST(dst), ST0
2439    emit_opcode( cbuf, 0xC8+$dst$$reg );
2440  %}
2441
2442  // Special case for moving an integer register to a stack slot.
2443  enc_class OpcPRegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2444    store_to_stackslot( cbuf, $primary, $src$$reg, $dst$$disp );
2445  %}
2446
2447  // Special case for moving a register to a stack slot.
2448  enc_class RegSS( stackSlotI dst, rRegI src ) %{ // RegSS
2449    // Opcode already emitted
2450    emit_rm( cbuf, 0x02, $src$$reg, ESP_enc );   // R/M byte
2451    emit_rm( cbuf, 0x00, ESP_enc, ESP_enc);          // SIB byte
2452    emit_d32(cbuf, $dst$$disp);   // Displacement
2453  %}
2454
2455  // Push the integer in stackSlot 'src' onto FP-stack
2456  enc_class Push_Mem_I( memory src ) %{    // FILD   [ESP+src]
2457    store_to_stackslot( cbuf, $primary, $secondary, $src$$disp );
2458  %}
2459
2460  // Push FPU's TOS float to a stack-slot, and pop FPU-stack
2461  enc_class Pop_Mem_FPR( stackSlotF dst ) %{ // FSTP_S [ESP+dst]
2462    store_to_stackslot( cbuf, 0xD9, 0x03, $dst$$disp );
2463  %}
2464
2465  // Same as Pop_Mem_F except for opcode
2466  // Push FPU's TOS double to a stack-slot, and pop FPU-stack
2467  enc_class Pop_Mem_DPR( stackSlotD dst ) %{ // FSTP_D [ESP+dst]
2468    store_to_stackslot( cbuf, 0xDD, 0x03, $dst$$disp );
2469  %}
2470
2471  enc_class Pop_Reg_FPR( regFPR dst ) %{
2472    emit_opcode( cbuf, 0xDD );           // FSTP   ST(i)
2473    emit_d8( cbuf, 0xD8+$dst$$reg );
2474  %}
2475
2476  enc_class Push_Reg_FPR( regFPR dst ) %{
2477    emit_opcode( cbuf, 0xD9 );           // FLD    ST(i-1)
2478    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2479  %}
2480
2481  // Push FPU's float to a stack-slot, and pop FPU-stack
2482  enc_class Pop_Mem_Reg_FPR( stackSlotF dst, regFPR src ) %{
2483    int pop = 0x02;
2484    if ($src$$reg != FPR1L_enc) {
2485      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2486      emit_d8( cbuf, 0xC0-1+$src$$reg );
2487      pop = 0x03;
2488    }
2489    store_to_stackslot( cbuf, 0xD9, pop, $dst$$disp ); // FST<P>_S  [ESP+dst]
2490  %}
2491
2492  // Push FPU's double to a stack-slot, and pop FPU-stack
2493  enc_class Pop_Mem_Reg_DPR( stackSlotD dst, regDPR src ) %{
2494    int pop = 0x02;
2495    if ($src$$reg != FPR1L_enc) {
2496      emit_opcode( cbuf, 0xD9 );         // FLD    ST(i-1)
2497      emit_d8( cbuf, 0xC0-1+$src$$reg );
2498      pop = 0x03;
2499    }
2500    store_to_stackslot( cbuf, 0xDD, pop, $dst$$disp ); // FST<P>_D  [ESP+dst]
2501  %}
2502
2503  // Push FPU's double to a FPU-stack-slot, and pop FPU-stack
2504  enc_class Pop_Reg_Reg_DPR( regDPR dst, regFPR src ) %{
2505    int pop = 0xD0 - 1; // -1 since we skip FLD
2506    if ($src$$reg != FPR1L_enc) {
2507      emit_opcode( cbuf, 0xD9 );         // FLD    ST(src-1)
2508      emit_d8( cbuf, 0xC0-1+$src$$reg );
2509      pop = 0xD8;
2510    }
2511    emit_opcode( cbuf, 0xDD );
2512    emit_d8( cbuf, pop+$dst$$reg );      // FST<P> ST(i)
2513  %}
2514
2515
2516  enc_class Push_Reg_Mod_DPR( regDPR dst, regDPR src) %{
2517    // load dst in FPR0
2518    emit_opcode( cbuf, 0xD9 );
2519    emit_d8( cbuf, 0xC0-1+$dst$$reg );
2520    if ($src$$reg != FPR1L_enc) {
2521      // fincstp
2522      emit_opcode (cbuf, 0xD9);
2523      emit_opcode (cbuf, 0xF7);
2524      // swap src with FPR1:
2525      // FXCH FPR1 with src
2526      emit_opcode(cbuf, 0xD9);
2527      emit_d8(cbuf, 0xC8-1+$src$$reg );
2528      // fdecstp
2529      emit_opcode (cbuf, 0xD9);
2530      emit_opcode (cbuf, 0xF6);
2531    }
2532  %}
2533
2534  enc_class Push_ModD_encoding(regD src0, regD src1) %{
2535    MacroAssembler _masm(&cbuf);
2536    __ subptr(rsp, 8);
2537    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
2538    __ fld_d(Address(rsp, 0));
2539    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
2540    __ fld_d(Address(rsp, 0));
2541  %}
2542
2543  enc_class Push_ModF_encoding(regF src0, regF src1) %{
2544    MacroAssembler _masm(&cbuf);
2545    __ subptr(rsp, 4);
2546    __ movflt(Address(rsp, 0), $src1$$XMMRegister);
2547    __ fld_s(Address(rsp, 0));
2548    __ movflt(Address(rsp, 0), $src0$$XMMRegister);
2549    __ fld_s(Address(rsp, 0));
2550  %}
2551
2552  enc_class Push_ResultD(regD dst) %{
2553    MacroAssembler _masm(&cbuf);
2554    __ fstp_d(Address(rsp, 0));
2555    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
2556    __ addptr(rsp, 8);
2557  %}
2558
2559  enc_class Push_ResultF(regF dst, immI d8) %{
2560    MacroAssembler _masm(&cbuf);
2561    __ fstp_s(Address(rsp, 0));
2562    __ movflt($dst$$XMMRegister, Address(rsp, 0));
2563    __ addptr(rsp, $d8$$constant);
2564  %}
2565
2566  enc_class Push_SrcD(regD src) %{
2567    MacroAssembler _masm(&cbuf);
2568    __ subptr(rsp, 8);
2569    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2570    __ fld_d(Address(rsp, 0));
2571  %}
2572
2573  enc_class push_stack_temp_qword() %{
2574    MacroAssembler _masm(&cbuf);
2575    __ subptr(rsp, 8);
2576  %}
2577
2578  enc_class pop_stack_temp_qword() %{
2579    MacroAssembler _masm(&cbuf);
2580    __ addptr(rsp, 8);
2581  %}
2582
2583  enc_class push_xmm_to_fpr1(regD src) %{
2584    MacroAssembler _masm(&cbuf);
2585    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
2586    __ fld_d(Address(rsp, 0));
2587  %}
2588
2589  enc_class Push_Result_Mod_DPR( regDPR src) %{
2590    if ($src$$reg != FPR1L_enc) {
2591      // fincstp
2592      emit_opcode (cbuf, 0xD9);
2593      emit_opcode (cbuf, 0xF7);
2594      // FXCH FPR1 with src
2595      emit_opcode(cbuf, 0xD9);
2596      emit_d8(cbuf, 0xC8-1+$src$$reg );
2597      // fdecstp
2598      emit_opcode (cbuf, 0xD9);
2599      emit_opcode (cbuf, 0xF6);
2600    }
2601    // // following asm replaced with Pop_Reg_F or Pop_Mem_F
2602    // // FSTP   FPR$dst$$reg
2603    // emit_opcode( cbuf, 0xDD );
2604    // emit_d8( cbuf, 0xD8+$dst$$reg );
2605  %}
2606
2607  enc_class fnstsw_sahf_skip_parity() %{
2608    // fnstsw ax
2609    emit_opcode( cbuf, 0xDF );
2610    emit_opcode( cbuf, 0xE0 );
2611    // sahf
2612    emit_opcode( cbuf, 0x9E );
2613    // jnp  ::skip
2614    emit_opcode( cbuf, 0x7B );
2615    emit_opcode( cbuf, 0x05 );
2616  %}
2617
2618  enc_class emitModDPR() %{
2619    // fprem must be iterative
2620    // :: loop
2621    // fprem
2622    emit_opcode( cbuf, 0xD9 );
2623    emit_opcode( cbuf, 0xF8 );
2624    // wait
2625    emit_opcode( cbuf, 0x9b );
2626    // fnstsw ax
2627    emit_opcode( cbuf, 0xDF );
2628    emit_opcode( cbuf, 0xE0 );
2629    // sahf
2630    emit_opcode( cbuf, 0x9E );
2631    // jp  ::loop
2632    emit_opcode( cbuf, 0x0F );
2633    emit_opcode( cbuf, 0x8A );
2634    emit_opcode( cbuf, 0xF4 );
2635    emit_opcode( cbuf, 0xFF );
2636    emit_opcode( cbuf, 0xFF );
2637    emit_opcode( cbuf, 0xFF );
2638  %}
2639
2640  enc_class fpu_flags() %{
2641    // fnstsw_ax
2642    emit_opcode( cbuf, 0xDF);
2643    emit_opcode( cbuf, 0xE0);
2644    // test ax,0x0400
2645    emit_opcode( cbuf, 0x66 );   // operand-size prefix for 16-bit immediate
2646    emit_opcode( cbuf, 0xA9 );
2647    emit_d16   ( cbuf, 0x0400 );
2648    // // // This sequence works, but stalls for 12-16 cycles on PPro
2649    // // test rax,0x0400
2650    // emit_opcode( cbuf, 0xA9 );
2651    // emit_d32   ( cbuf, 0x00000400 );
2652    //
2653    // jz exit (no unordered comparison)
2654    emit_opcode( cbuf, 0x74 );
2655    emit_d8    ( cbuf, 0x02 );
2656    // mov ah,1 - treat as LT case (set carry flag)
2657    emit_opcode( cbuf, 0xB4 );
2658    emit_d8    ( cbuf, 0x01 );
2659    // sahf
2660    emit_opcode( cbuf, 0x9E);
2661  %}
2662
2663  enc_class cmpF_P6_fixup() %{
2664    // Fixup the integer flags in case comparison involved a NaN
2665    //
2666    // JNP exit (no unordered comparison, P-flag is set by NaN)
2667    emit_opcode( cbuf, 0x7B );
2668    emit_d8    ( cbuf, 0x03 );
2669    // MOV AH,1 - treat as LT case (set carry flag)
2670    emit_opcode( cbuf, 0xB4 );
2671    emit_d8    ( cbuf, 0x01 );
2672    // SAHF
2673    emit_opcode( cbuf, 0x9E);
2674    // NOP     // target for branch to avoid branch to branch
2675    emit_opcode( cbuf, 0x90);
2676  %}
2677
2678//     fnstsw_ax();
2679//     sahf();
2680//     movl(dst, nan_result);
2681//     jcc(Assembler::parity, exit);
2682//     movl(dst, less_result);
2683//     jcc(Assembler::below, exit);
2684//     movl(dst, equal_result);
2685//     jcc(Assembler::equal, exit);
2686//     movl(dst, greater_result);
2687
2688// less_result     =  1;
2689// greater_result  = -1;
2690// equal_result    = 0;
2691// nan_result      = -1;
2692
2693  enc_class CmpF_Result(rRegI dst) %{
2694    // fnstsw_ax();
2695    emit_opcode( cbuf, 0xDF);
2696    emit_opcode( cbuf, 0xE0);
2697    // sahf
2698    emit_opcode( cbuf, 0x9E);
2699    // movl(dst, nan_result);
2700    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2701    emit_d32( cbuf, -1 );
2702    // jcc(Assembler::parity, exit);
2703    emit_opcode( cbuf, 0x7A );
2704    emit_d8    ( cbuf, 0x13 );
2705    // movl(dst, less_result);
2706    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2707    emit_d32( cbuf, -1 );
2708    // jcc(Assembler::below, exit);
2709    emit_opcode( cbuf, 0x72 );
2710    emit_d8    ( cbuf, 0x0C );
2711    // movl(dst, equal_result);
2712    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2713    emit_d32( cbuf, 0 );
2714    // jcc(Assembler::equal, exit);
2715    emit_opcode( cbuf, 0x74 );
2716    emit_d8    ( cbuf, 0x05 );
2717    // movl(dst, greater_result);
2718    emit_opcode( cbuf, 0xB8 + $dst$$reg);
2719    emit_d32( cbuf, 1 );
2720  %}
2721
2722
2723  // Compare the longs and set flags
2724  // BROKEN!  Do Not use as-is
2725  enc_class cmpl_test( eRegL src1, eRegL src2 ) %{
2726    // CMP    $src1.hi,$src2.hi
2727    emit_opcode( cbuf, 0x3B );
2728    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2729    // JNE,s  done
2730    emit_opcode(cbuf,0x75);
2731    emit_d8(cbuf, 2 );
2732    // CMP    $src1.lo,$src2.lo
2733    emit_opcode( cbuf, 0x3B );
2734    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2735// done:
2736  %}
2737
2738  enc_class convert_int_long( regL dst, rRegI src ) %{
2739    // mov $dst.lo,$src
2740    int dst_encoding = $dst$$reg;
2741    int src_encoding = $src$$reg;
2742    encode_Copy( cbuf, dst_encoding  , src_encoding );
2743    // mov $dst.hi,$src
2744    encode_Copy( cbuf, HIGH_FROM_LOW(dst_encoding), src_encoding );
2745    // sar $dst.hi,31
2746    emit_opcode( cbuf, 0xC1 );
2747    emit_rm(cbuf, 0x3, 7, HIGH_FROM_LOW(dst_encoding) );
2748    emit_d8(cbuf, 0x1F );
2749  %}
2750
2751  enc_class convert_long_double( eRegL src ) %{
2752    // push $src.hi
2753    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2754    // push $src.lo
2755    emit_opcode(cbuf, 0x50+$src$$reg  );
2756    // fild 64-bits at [SP]
2757    emit_opcode(cbuf,0xdf);
2758    emit_d8(cbuf, 0x6C);
2759    emit_d8(cbuf, 0x24);
2760    emit_d8(cbuf, 0x00);
2761    // pop stack
2762    emit_opcode(cbuf, 0x83); // add  SP, #8
2763    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2764    emit_d8(cbuf, 0x8);
2765  %}
2766
2767  enc_class multiply_con_and_shift_high( eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr ) %{
2768    // IMUL   EDX:EAX,$src1
2769    emit_opcode( cbuf, 0xF7 );
2770    emit_rm( cbuf, 0x3, 0x5, $src1$$reg );
2771    // SAR    EDX,$cnt-32
2772    int shift_count = ((int)$cnt$$constant) - 32;
2773    if (shift_count > 0) {
2774      emit_opcode(cbuf, 0xC1);
2775      emit_rm(cbuf, 0x3, 7, $dst$$reg );
2776      emit_d8(cbuf, shift_count);
2777    }
2778  %}
2779
2780  // this version doesn't have add sp, 8
2781  enc_class convert_long_double2( eRegL src ) %{
2782    // push $src.hi
2783    emit_opcode(cbuf, 0x50+HIGH_FROM_LOW($src$$reg));
2784    // push $src.lo
2785    emit_opcode(cbuf, 0x50+$src$$reg  );
2786    // fild 64-bits at [SP]
2787    emit_opcode(cbuf,0xdf);
2788    emit_d8(cbuf, 0x6C);
2789    emit_d8(cbuf, 0x24);
2790    emit_d8(cbuf, 0x00);
2791  %}
2792
2793  enc_class long_int_multiply( eADXRegL dst, nadxRegI src) %{
2794    // Basic idea: long = (long)int * (long)int
2795    // IMUL EDX:EAX, src
2796    emit_opcode( cbuf, 0xF7 );
2797    emit_rm( cbuf, 0x3, 0x5, $src$$reg);
2798  %}
2799
2800  enc_class long_uint_multiply( eADXRegL dst, nadxRegI src) %{
2801    // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
2802    // MUL EDX:EAX, src
2803    emit_opcode( cbuf, 0xF7 );
2804    emit_rm( cbuf, 0x3, 0x4, $src$$reg);
2805  %}
2806
2807  enc_class long_multiply( eADXRegL dst, eRegL src, rRegI tmp ) %{
2808    // Basic idea: lo(result) = lo(x_lo * y_lo)
2809    //             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
2810    // MOV    $tmp,$src.lo
2811    encode_Copy( cbuf, $tmp$$reg, $src$$reg );
2812    // IMUL   $tmp,EDX
2813    emit_opcode( cbuf, 0x0F );
2814    emit_opcode( cbuf, 0xAF );
2815    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2816    // MOV    EDX,$src.hi
2817    encode_Copy( cbuf, HIGH_FROM_LOW($dst$$reg), HIGH_FROM_LOW($src$$reg) );
2818    // IMUL   EDX,EAX
2819    emit_opcode( cbuf, 0x0F );
2820    emit_opcode( cbuf, 0xAF );
2821    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $dst$$reg );
2822    // ADD    $tmp,EDX
2823    emit_opcode( cbuf, 0x03 );
2824    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2825    // MUL   EDX:EAX,$src.lo
2826    emit_opcode( cbuf, 0xF7 );
2827    emit_rm( cbuf, 0x3, 0x4, $src$$reg );
2828    // ADD    EDX,ESI
2829    emit_opcode( cbuf, 0x03 );
2830    emit_rm( cbuf, 0x3, HIGH_FROM_LOW($dst$$reg), $tmp$$reg );
2831  %}
2832
2833  enc_class long_multiply_con( eADXRegL dst, immL_127 src, rRegI tmp ) %{
2834    // Basic idea: lo(result) = lo(src * y_lo)
2835    //             hi(result) = hi(src * y_lo) + lo(src * y_hi)
2836    // IMUL   $tmp,EDX,$src
2837    emit_opcode( cbuf, 0x6B );
2838    emit_rm( cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($dst$$reg) );
2839    emit_d8( cbuf, (int)$src$$constant );
2840    // MOV    EDX,$src
2841    emit_opcode(cbuf, 0xB8 + EDX_enc);
2842    emit_d32( cbuf, (int)$src$$constant );
2843    // MUL   EDX:EAX,EDX
2844    emit_opcode( cbuf, 0xF7 );
2845    emit_rm( cbuf, 0x3, 0x4, EDX_enc );
2846    // ADD    EDX,ESI
2847    emit_opcode( cbuf, 0x03 );
2848    emit_rm( cbuf, 0x3, EDX_enc, $tmp$$reg );
2849  %}
2850
2851  enc_class long_div( eRegL src1, eRegL src2 ) %{
2852    // PUSH src1.hi
2853    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2854    // PUSH src1.lo
2855    emit_opcode(cbuf,               0x50+$src1$$reg  );
2856    // PUSH src2.hi
2857    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2858    // PUSH src2.lo
2859    emit_opcode(cbuf,               0x50+$src2$$reg  );
2860    // CALL directly to the runtime
2861    cbuf.set_insts_mark();
2862    emit_opcode(cbuf,0xE8);       // Call into runtime
2863    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::ldiv) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2864    // Restore stack
2865    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2866    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2867    emit_d8(cbuf, 4*4);
2868  %}
2869
2870  enc_class long_mod( eRegL src1, eRegL src2 ) %{
2871    // PUSH src1.hi
2872    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src1$$reg) );
2873    // PUSH src1.lo
2874    emit_opcode(cbuf,               0x50+$src1$$reg  );
2875    // PUSH src2.hi
2876    emit_opcode(cbuf, HIGH_FROM_LOW(0x50+$src2$$reg) );
2877    // PUSH src2.lo
2878    emit_opcode(cbuf,               0x50+$src2$$reg  );
2879    // CALL directly to the runtime
2880    cbuf.set_insts_mark();
2881    emit_opcode(cbuf,0xE8);       // Call into runtime
2882    emit_d32_reloc(cbuf, (CAST_FROM_FN_PTR(address, SharedRuntime::lrem ) - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
2883    // Restore stack
2884    emit_opcode(cbuf, 0x83); // add  SP, #framesize
2885    emit_rm(cbuf, 0x3, 0x00, ESP_enc);
2886    emit_d8(cbuf, 4*4);
2887  %}
2888
2889  enc_class long_cmp_flags0( eRegL src, rRegI tmp ) %{
2890    // MOV   $tmp,$src.lo
2891    emit_opcode(cbuf, 0x8B);
2892    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg);
2893    // OR    $tmp,$src.hi
2894    emit_opcode(cbuf, 0x0B);
2895    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg));
2896  %}
2897
2898  enc_class long_cmp_flags1( eRegL src1, eRegL src2 ) %{
2899    // CMP    $src1.lo,$src2.lo
2900    emit_opcode( cbuf, 0x3B );
2901    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2902    // JNE,s  skip
2903    emit_cc(cbuf, 0x70, 0x5);
2904    emit_d8(cbuf,2);
2905    // CMP    $src1.hi,$src2.hi
2906    emit_opcode( cbuf, 0x3B );
2907    emit_rm(cbuf, 0x3, HIGH_FROM_LOW($src1$$reg), HIGH_FROM_LOW($src2$$reg) );
2908  %}
2909
2910  enc_class long_cmp_flags2( eRegL src1, eRegL src2, rRegI tmp ) %{
2911    // CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits
2912    emit_opcode( cbuf, 0x3B );
2913    emit_rm(cbuf, 0x3, $src1$$reg, $src2$$reg );
2914    // MOV    $tmp,$src1.hi
2915    emit_opcode( cbuf, 0x8B );
2916    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src1$$reg) );
2917    // SBB   $tmp,$src2.hi\t! Compute flags for long compare
2918    emit_opcode( cbuf, 0x1B );
2919    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src2$$reg) );
2920  %}
2921
2922  enc_class long_cmp_flags3( eRegL src, rRegI tmp ) %{
2923    // XOR    $tmp,$tmp
2924    emit_opcode(cbuf,0x33);  // XOR
2925    emit_rm(cbuf,0x3, $tmp$$reg, $tmp$$reg);
2926    // CMP    $tmp,$src.lo
2927    emit_opcode( cbuf, 0x3B );
2928    emit_rm(cbuf, 0x3, $tmp$$reg, $src$$reg );
2929    // SBB    $tmp,$src.hi
2930    emit_opcode( cbuf, 0x1B );
2931    emit_rm(cbuf, 0x3, $tmp$$reg, HIGH_FROM_LOW($src$$reg) );
2932  %}
2933
2934 // Sniff, sniff... smells like Gnu Superoptimizer
2935  enc_class neg_long( eRegL dst ) %{
2936    emit_opcode(cbuf,0xF7);    // NEG hi
2937    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2938    emit_opcode(cbuf,0xF7);    // NEG lo
2939    emit_rm    (cbuf,0x3, 0x3,               $dst$$reg );
2940    emit_opcode(cbuf,0x83);    // SBB hi,0
2941    emit_rm    (cbuf,0x3, 0x3, HIGH_FROM_LOW($dst$$reg));
2942    emit_d8    (cbuf,0 );
2943  %}
2944
2945  enc_class enc_pop_rdx() %{
2946    emit_opcode(cbuf,0x5A);
2947  %}
2948
2949  enc_class enc_rethrow() %{
2950    cbuf.set_insts_mark();
2951    emit_opcode(cbuf, 0xE9);        // jmp    entry
2952    emit_d32_reloc(cbuf, (int)OptoRuntime::rethrow_stub() - ((int)cbuf.insts_end())-4,
2953                   runtime_call_Relocation::spec(), RELOC_IMM32 );
2954  %}
2955
2956
2957  // Convert a double to an int.  Java semantics require we do complex
2958  // manglelations in the corner cases.  So we set the rounding mode to
2959  // 'zero', store the darned double down as an int, and reset the
2960  // rounding mode to 'nearest'.  The hardware throws an exception which
2961  // patches up the correct value directly to the stack.
2962  enc_class DPR2I_encoding( regDPR src ) %{
2963    // Flip to round-to-zero mode.  We attempted to allow invalid-op
2964    // exceptions here, so that a NAN or other corner-case value will
2965    // thrown an exception (but normal values get converted at full speed).
2966    // However, I2C adapters and other float-stack manglers leave pending
2967    // invalid-op exceptions hanging.  We would have to clear them before
2968    // enabling them and that is more expensive than just testing for the
2969    // invalid value Intel stores down in the corner cases.
2970    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
2971    emit_opcode(cbuf,0x2D);
2972    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
2973    // Allocate a word
2974    emit_opcode(cbuf,0x83);            // SUB ESP,4
2975    emit_opcode(cbuf,0xEC);
2976    emit_d8(cbuf,0x04);
2977    // Encoding assumes a double has been pushed into FPR0.
2978    // Store down the double as an int, popping the FPU stack
2979    emit_opcode(cbuf,0xDB);            // FISTP [ESP]
2980    emit_opcode(cbuf,0x1C);
2981    emit_d8(cbuf,0x24);
2982    // Restore the rounding mode; mask the exception
2983    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
2984    emit_opcode(cbuf,0x2D);
2985    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
2986        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
2987        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
2988
2989    // Load the converted int; adjust CPU stack
2990    emit_opcode(cbuf,0x58);       // POP EAX
2991    emit_opcode(cbuf,0x3D);       // CMP EAX,imm
2992    emit_d32   (cbuf,0x80000000); //         0x80000000
2993    emit_opcode(cbuf,0x75);       // JNE around_slow_call
2994    emit_d8    (cbuf,0x07);       // Size of slow_call
2995    // Push src onto stack slow-path
2996    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
2997    emit_d8    (cbuf,0xC0-1+$src$$reg );
2998    // CALL directly to the runtime
2999    cbuf.set_insts_mark();
3000    emit_opcode(cbuf,0xE8);       // Call into runtime
3001    emit_d32_reloc(cbuf, (StubRoutines::d2i_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3002    // Carry on here...
3003  %}
3004
3005  enc_class DPR2L_encoding( regDPR src ) %{
3006    emit_opcode(cbuf,0xD9);            // FLDCW  trunc
3007    emit_opcode(cbuf,0x2D);
3008    emit_d32(cbuf,(int)StubRoutines::addr_fpu_cntrl_wrd_trunc());
3009    // Allocate a word
3010    emit_opcode(cbuf,0x83);            // SUB ESP,8
3011    emit_opcode(cbuf,0xEC);
3012    emit_d8(cbuf,0x08);
3013    // Encoding assumes a double has been pushed into FPR0.
3014    // Store down the double as a long, popping the FPU stack
3015    emit_opcode(cbuf,0xDF);            // FISTP [ESP]
3016    emit_opcode(cbuf,0x3C);
3017    emit_d8(cbuf,0x24);
3018    // Restore the rounding mode; mask the exception
3019    emit_opcode(cbuf,0xD9);            // FLDCW   std/24-bit mode
3020    emit_opcode(cbuf,0x2D);
3021    emit_d32( cbuf, Compile::current()->in_24_bit_fp_mode()
3022        ? (int)StubRoutines::addr_fpu_cntrl_wrd_24()
3023        : (int)StubRoutines::addr_fpu_cntrl_wrd_std());
3024
3025    // Load the converted int; adjust CPU stack
3026    emit_opcode(cbuf,0x58);       // POP EAX
3027    emit_opcode(cbuf,0x5A);       // POP EDX
3028    emit_opcode(cbuf,0x81);       // CMP EDX,imm
3029    emit_d8    (cbuf,0xFA);       // rdx
3030    emit_d32   (cbuf,0x80000000); //         0x80000000
3031    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3032    emit_d8    (cbuf,0x07+4);     // Size of slow_call
3033    emit_opcode(cbuf,0x85);       // TEST EAX,EAX
3034    emit_opcode(cbuf,0xC0);       // 2/rax,/rax,
3035    emit_opcode(cbuf,0x75);       // JNE around_slow_call
3036    emit_d8    (cbuf,0x07);       // Size of slow_call
3037    // Push src onto stack slow-path
3038    emit_opcode(cbuf,0xD9 );      // FLD     ST(i)
3039    emit_d8    (cbuf,0xC0-1+$src$$reg );
3040    // CALL directly to the runtime
3041    cbuf.set_insts_mark();
3042    emit_opcode(cbuf,0xE8);       // Call into runtime
3043    emit_d32_reloc(cbuf, (StubRoutines::d2l_wrapper() - cbuf.insts_end()) - 4, runtime_call_Relocation::spec(), RELOC_IMM32 );
3044    // Carry on here...
3045  %}
3046
3047  enc_class FMul_ST_reg( eRegFPR src1 ) %{
3048    // Operand was loaded from memory into fp ST (stack top)
3049    // FMUL   ST,$src  /* D8 C8+i */
3050    emit_opcode(cbuf, 0xD8);
3051    emit_opcode(cbuf, 0xC8 + $src1$$reg);
3052  %}
3053
3054  enc_class FAdd_ST_reg( eRegFPR src2 ) %{
3055    // FADDP  ST,src2  /* D8 C0+i */
3056    emit_opcode(cbuf, 0xD8);
3057    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3058    //could use FADDP  src2,fpST  /* DE C0+i */
3059  %}
3060
3061  enc_class FAddP_reg_ST( eRegFPR src2 ) %{
3062    // FADDP  src2,ST  /* DE C0+i */
3063    emit_opcode(cbuf, 0xDE);
3064    emit_opcode(cbuf, 0xC0 + $src2$$reg);
3065  %}
3066
3067  enc_class subFPR_divFPR_encode( eRegFPR src1, eRegFPR src2) %{
3068    // Operand has been loaded into fp ST (stack top)
3069      // FSUB   ST,$src1
3070      emit_opcode(cbuf, 0xD8);
3071      emit_opcode(cbuf, 0xE0 + $src1$$reg);
3072
3073      // FDIV
3074      emit_opcode(cbuf, 0xD8);
3075      emit_opcode(cbuf, 0xF0 + $src2$$reg);
3076  %}
3077
3078  enc_class MulFAddF (eRegFPR src1, eRegFPR src2) %{
3079    // Operand was loaded from memory into fp ST (stack top)
3080    // FADD   ST,$src  /* D8 C0+i */
3081    emit_opcode(cbuf, 0xD8);
3082    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3083
3084    // FMUL  ST,src2  /* D8 C*+i */
3085    emit_opcode(cbuf, 0xD8);
3086    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3087  %}
3088
3089
3090  enc_class MulFAddFreverse (eRegFPR src1, eRegFPR src2) %{
3091    // Operand was loaded from memory into fp ST (stack top)
3092    // FADD   ST,$src  /* D8 C0+i */
3093    emit_opcode(cbuf, 0xD8);
3094    emit_opcode(cbuf, 0xC0 + $src1$$reg);
3095
3096    // FMULP  src2,ST  /* DE C8+i */
3097    emit_opcode(cbuf, 0xDE);
3098    emit_opcode(cbuf, 0xC8 + $src2$$reg);
3099  %}
3100
3101  // Atomically load the volatile long
3102  enc_class enc_loadL_volatile( memory mem, stackSlotL dst ) %{
3103    emit_opcode(cbuf,0xDF);
3104    int rm_byte_opcode = 0x05;
3105    int base     = $mem$$base;
3106    int index    = $mem$$index;
3107    int scale    = $mem$$scale;
3108    int displace = $mem$$disp;
3109    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3110    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3111    store_to_stackslot( cbuf, 0x0DF, 0x07, $dst$$disp );
3112  %}
3113
3114  // Volatile Store Long.  Must be atomic, so move it into
3115  // the FP TOS and then do a 64-bit FIST.  Has to probe the
3116  // target address before the store (for null-ptr checks)
3117  // so the memory operand is used twice in the encoding.
3118  enc_class enc_storeL_volatile( memory mem, stackSlotL src ) %{
3119    store_to_stackslot( cbuf, 0x0DF, 0x05, $src$$disp );
3120    cbuf.set_insts_mark();            // Mark start of FIST in case $mem has an oop
3121    emit_opcode(cbuf,0xDF);
3122    int rm_byte_opcode = 0x07;
3123    int base     = $mem$$base;
3124    int index    = $mem$$index;
3125    int scale    = $mem$$scale;
3126    int displace = $mem$$disp;
3127    relocInfo::relocType disp_reloc = $mem->disp_reloc(); // disp-as-oop when working with static globals
3128    encode_RegMem(cbuf, rm_byte_opcode, base, index, scale, displace, disp_reloc);
3129  %}
3130
3131  // Safepoint Poll.  This polls the safepoint page, and causes an
3132  // exception if it is not readable. Unfortunately, it kills the condition code
3133  // in the process
3134  // We current use TESTL [spp],EDI
3135  // A better choice might be TESTB [spp + pagesize() - CacheLineSize()],0
3136
3137  enc_class Safepoint_Poll() %{
3138    cbuf.relocate(cbuf.insts_mark(), relocInfo::poll_type, 0);
3139    emit_opcode(cbuf,0x85);
3140    emit_rm (cbuf, 0x0, 0x7, 0x5);
3141    emit_d32(cbuf, (intptr_t)os::get_polling_page());
3142  %}
3143%}
3144
3145
3146//----------FRAME--------------------------------------------------------------
3147// Definition of frame structure and management information.
3148//
3149//  S T A C K   L A Y O U T    Allocators stack-slot number
3150//                             |   (to get allocators register number
3151//  G  Owned by    |        |  v    add OptoReg::stack0())
3152//  r   CALLER     |        |
3153//  o     |        +--------+      pad to even-align allocators stack-slot
3154//  w     V        |  pad0  |        numbers; owned by CALLER
3155//  t   -----------+--------+----> Matcher::_in_arg_limit, unaligned
3156//  h     ^        |   in   |  5
3157//        |        |  args  |  4   Holes in incoming args owned by SELF
3158//  |     |        |        |  3
3159//  |     |        +--------+
3160//  V     |        | old out|      Empty on Intel, window on Sparc
3161//        |    old |preserve|      Must be even aligned.
3162//        |     SP-+--------+----> Matcher::_old_SP, even aligned
3163//        |        |   in   |  3   area for Intel ret address
3164//     Owned by    |preserve|      Empty on Sparc.
3165//       SELF      +--------+
3166//        |        |  pad2  |  2   pad to align old SP
3167//        |        +--------+  1
3168//        |        | locks  |  0
3169//        |        +--------+----> OptoReg::stack0(), even aligned
3170//        |        |  pad1  | 11   pad to align new SP
3171//        |        +--------+
3172//        |        |        | 10
3173//        |        | spills |  9   spills
3174//        V        |        |  8   (pad0 slot for callee)
3175//      -----------+--------+----> Matcher::_out_arg_limit, unaligned
3176//        ^        |  out   |  7
3177//        |        |  args  |  6   Holes in outgoing args owned by CALLEE
3178//     Owned by    +--------+
3179//      CALLEE     | new out|  6   Empty on Intel, window on Sparc
3180//        |    new |preserve|      Must be even-aligned.
3181//        |     SP-+--------+----> Matcher::_new_SP, even aligned
3182//        |        |        |
3183//
3184// Note 1: Only region 8-11 is determined by the allocator.  Region 0-5 is
3185//         known from SELF's arguments and the Java calling convention.
3186//         Region 6-7 is determined per call site.
3187// Note 2: If the calling convention leaves holes in the incoming argument
3188//         area, those holes are owned by SELF.  Holes in the outgoing area
3189//         are owned by the CALLEE.  Holes should not be nessecary in the
3190//         incoming area, as the Java calling convention is completely under
3191//         the control of the AD file.  Doubles can be sorted and packed to
3192//         avoid holes.  Holes in the outgoing arguments may be nessecary for
3193//         varargs C calling conventions.
3194// Note 3: Region 0-3 is even aligned, with pad2 as needed.  Region 3-5 is
3195//         even aligned with pad0 as needed.
3196//         Region 6 is even aligned.  Region 6-7 is NOT even aligned;
3197//         region 6-11 is even aligned; it may be padded out more so that
3198//         the region from SP to FP meets the minimum stack alignment.
3199
3200frame %{
3201  // What direction does stack grow in (assumed to be same for C & Java)
3202  stack_direction(TOWARDS_LOW);
3203
3204  // These three registers define part of the calling convention
3205  // between compiled code and the interpreter.
3206  inline_cache_reg(EAX);                // Inline Cache Register
3207  interpreter_method_oop_reg(EBX);      // Method Oop Register when calling interpreter
3208
3209  // Optional: name the operand used by cisc-spilling to access [stack_pointer + offset]
3210  cisc_spilling_operand_name(indOffset32);
3211
3212  // Number of stack slots consumed by locking an object
3213  sync_stack_slots(1);
3214
3215  // Compiled code's Frame Pointer
3216  frame_pointer(ESP);
3217  // Interpreter stores its frame pointer in a register which is
3218  // stored to the stack by I2CAdaptors.
3219  // I2CAdaptors convert from interpreted java to compiled java.
3220  interpreter_frame_pointer(EBP);
3221
3222  // Stack alignment requirement
3223  // Alignment size in bytes (128-bit -> 16 bytes)
3224  stack_alignment(StackAlignmentInBytes);
3225
3226  // Number of stack slots between incoming argument block and the start of
3227  // a new frame.  The PROLOG must add this many slots to the stack.  The
3228  // EPILOG must remove this many slots.  Intel needs one slot for
3229  // return address and one for rbp, (must save rbp)
3230  in_preserve_stack_slots(2+VerifyStackAtCalls);
3231
3232  // Number of outgoing stack slots killed above the out_preserve_stack_slots
3233  // for calls to C.  Supports the var-args backing area for register parms.
3234  varargs_C_out_slots_killed(0);
3235
3236  // The after-PROLOG location of the return address.  Location of
3237  // return address specifies a type (REG or STACK) and a number
3238  // representing the register number (i.e. - use a register name) or
3239  // stack slot.
3240  // Ret Addr is on stack in slot 0 if no locks or verification or alignment.
3241  // Otherwise, it is above the locks and verification slot and alignment word
3242  return_addr(STACK - 1 +
3243              round_to((Compile::current()->in_preserve_stack_slots() +
3244                        Compile::current()->fixed_slots()),
3245                       stack_alignment_in_slots()));
3246
3247  // Body of function which returns an integer array locating
3248  // arguments either in registers or in stack slots.  Passed an array
3249  // of ideal registers called "sig" and a "length" count.  Stack-slot
3250  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3251  // arguments for a CALLEE.  Incoming stack arguments are
3252  // automatically biased by the preserve_stack_slots field above.
3253  calling_convention %{
3254    // No difference between ingoing/outgoing just pass false
3255    SharedRuntime::java_calling_convention(sig_bt, regs, length, false);
3256  %}
3257
3258
3259  // Body of function which returns an integer array locating
3260  // arguments either in registers or in stack slots.  Passed an array
3261  // of ideal registers called "sig" and a "length" count.  Stack-slot
3262  // offsets are based on outgoing arguments, i.e. a CALLER setting up
3263  // arguments for a CALLEE.  Incoming stack arguments are
3264  // automatically biased by the preserve_stack_slots field above.
3265  c_calling_convention %{
3266    // This is obviously always outgoing
3267    (void) SharedRuntime::c_calling_convention(sig_bt, regs, /*regs2=*/NULL, length);
3268  %}
3269
3270  // Location of C & interpreter return values
3271  c_return_value %{
3272    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3273    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3274    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3275
3276    // in SSE2+ mode we want to keep the FPU stack clean so pretend
3277    // that C functions return float and double results in XMM0.
3278    if( ideal_reg == Op_RegD && UseSSE>=2 )
3279      return OptoRegPair(XMM0b_num,XMM0_num);
3280    if( ideal_reg == Op_RegF && UseSSE>=2 )
3281      return OptoRegPair(OptoReg::Bad,XMM0_num);
3282
3283    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3284  %}
3285
3286  // Location of return values
3287  return_value %{
3288    assert( ideal_reg >= Op_RegI && ideal_reg <= Op_RegL, "only return normal values" );
3289    static int lo[Op_RegL+1] = { 0, 0, OptoReg::Bad, EAX_num,      EAX_num,      FPR1L_num,    FPR1L_num, EAX_num };
3290    static int hi[Op_RegL+1] = { 0, 0, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, OptoReg::Bad, FPR1H_num, EDX_num };
3291    if( ideal_reg == Op_RegD && UseSSE>=2 )
3292      return OptoRegPair(XMM0b_num,XMM0_num);
3293    if( ideal_reg == Op_RegF && UseSSE>=1 )
3294      return OptoRegPair(OptoReg::Bad,XMM0_num);
3295    return OptoRegPair(hi[ideal_reg],lo[ideal_reg]);
3296  %}
3297
3298%}
3299
3300//----------ATTRIBUTES---------------------------------------------------------
3301//----------Operand Attributes-------------------------------------------------
3302op_attrib op_cost(0);        // Required cost attribute
3303
3304//----------Instruction Attributes---------------------------------------------
3305ins_attrib ins_cost(100);       // Required cost attribute
3306ins_attrib ins_size(8);         // Required size attribute (in bits)
3307ins_attrib ins_short_branch(0); // Required flag: is this instruction a
3308                                // non-matching short branch variant of some
3309                                                            // long branch?
3310ins_attrib ins_alignment(1);    // Required alignment attribute (must be a power of 2)
3311                                // specifies the alignment that some part of the instruction (not
3312                                // necessarily the start) requires.  If > 1, a compute_padding()
3313                                // function must be provided for the instruction
3314
3315//----------OPERANDS-----------------------------------------------------------
3316// Operand definitions must precede instruction definitions for correct parsing
3317// in the ADLC because operands constitute user defined types which are used in
3318// instruction definitions.
3319
3320//----------Simple Operands----------------------------------------------------
3321// Immediate Operands
3322// Integer Immediate
3323operand immI() %{
3324  match(ConI);
3325
3326  op_cost(10);
3327  format %{ %}
3328  interface(CONST_INTER);
3329%}
3330
3331// Constant for test vs zero
3332operand immI0() %{
3333  predicate(n->get_int() == 0);
3334  match(ConI);
3335
3336  op_cost(0);
3337  format %{ %}
3338  interface(CONST_INTER);
3339%}
3340
3341// Constant for increment
3342operand immI1() %{
3343  predicate(n->get_int() == 1);
3344  match(ConI);
3345
3346  op_cost(0);
3347  format %{ %}
3348  interface(CONST_INTER);
3349%}
3350
3351// Constant for decrement
3352operand immI_M1() %{
3353  predicate(n->get_int() == -1);
3354  match(ConI);
3355
3356  op_cost(0);
3357  format %{ %}
3358  interface(CONST_INTER);
3359%}
3360
3361// Valid scale values for addressing modes
3362operand immI2() %{
3363  predicate(0 <= n->get_int() && (n->get_int() <= 3));
3364  match(ConI);
3365
3366  format %{ %}
3367  interface(CONST_INTER);
3368%}
3369
3370operand immI8() %{
3371  predicate((-128 <= n->get_int()) && (n->get_int() <= 127));
3372  match(ConI);
3373
3374  op_cost(5);
3375  format %{ %}
3376  interface(CONST_INTER);
3377%}
3378
3379operand immI16() %{
3380  predicate((-32768 <= n->get_int()) && (n->get_int() <= 32767));
3381  match(ConI);
3382
3383  op_cost(10);
3384  format %{ %}
3385  interface(CONST_INTER);
3386%}
3387
3388// Int Immediate non-negative
3389operand immU31()
3390%{
3391  predicate(n->get_int() >= 0);
3392  match(ConI);
3393
3394  op_cost(0);
3395  format %{ %}
3396  interface(CONST_INTER);
3397%}
3398
3399// Constant for long shifts
3400operand immI_32() %{
3401  predicate( n->get_int() == 32 );
3402  match(ConI);
3403
3404  op_cost(0);
3405  format %{ %}
3406  interface(CONST_INTER);
3407%}
3408
3409operand immI_1_31() %{
3410  predicate( n->get_int() >= 1 && n->get_int() <= 31 );
3411  match(ConI);
3412
3413  op_cost(0);
3414  format %{ %}
3415  interface(CONST_INTER);
3416%}
3417
3418operand immI_32_63() %{
3419  predicate( n->get_int() >= 32 && n->get_int() <= 63 );
3420  match(ConI);
3421  op_cost(0);
3422
3423  format %{ %}
3424  interface(CONST_INTER);
3425%}
3426
3427operand immI_1() %{
3428  predicate( n->get_int() == 1 );
3429  match(ConI);
3430
3431  op_cost(0);
3432  format %{ %}
3433  interface(CONST_INTER);
3434%}
3435
3436operand immI_2() %{
3437  predicate( n->get_int() == 2 );
3438  match(ConI);
3439
3440  op_cost(0);
3441  format %{ %}
3442  interface(CONST_INTER);
3443%}
3444
3445operand immI_3() %{
3446  predicate( n->get_int() == 3 );
3447  match(ConI);
3448
3449  op_cost(0);
3450  format %{ %}
3451  interface(CONST_INTER);
3452%}
3453
3454// Pointer Immediate
3455operand immP() %{
3456  match(ConP);
3457
3458  op_cost(10);
3459  format %{ %}
3460  interface(CONST_INTER);
3461%}
3462
3463// NULL Pointer Immediate
3464operand immP0() %{
3465  predicate( n->get_ptr() == 0 );
3466  match(ConP);
3467  op_cost(0);
3468
3469  format %{ %}
3470  interface(CONST_INTER);
3471%}
3472
3473// Long Immediate
3474operand immL() %{
3475  match(ConL);
3476
3477  op_cost(20);
3478  format %{ %}
3479  interface(CONST_INTER);
3480%}
3481
3482// Long Immediate zero
3483operand immL0() %{
3484  predicate( n->get_long() == 0L );
3485  match(ConL);
3486  op_cost(0);
3487
3488  format %{ %}
3489  interface(CONST_INTER);
3490%}
3491
3492// Long Immediate zero
3493operand immL_M1() %{
3494  predicate( n->get_long() == -1L );
3495  match(ConL);
3496  op_cost(0);
3497
3498  format %{ %}
3499  interface(CONST_INTER);
3500%}
3501
3502// Long immediate from 0 to 127.
3503// Used for a shorter form of long mul by 10.
3504operand immL_127() %{
3505  predicate((0 <= n->get_long()) && (n->get_long() <= 127));
3506  match(ConL);
3507  op_cost(0);
3508
3509  format %{ %}
3510  interface(CONST_INTER);
3511%}
3512
3513// Long Immediate: low 32-bit mask
3514operand immL_32bits() %{
3515  predicate(n->get_long() == 0xFFFFFFFFL);
3516  match(ConL);
3517  op_cost(0);
3518
3519  format %{ %}
3520  interface(CONST_INTER);
3521%}
3522
3523// Long Immediate: low 32-bit mask
3524operand immL32() %{
3525  predicate(n->get_long() == (int)(n->get_long()));
3526  match(ConL);
3527  op_cost(20);
3528
3529  format %{ %}
3530  interface(CONST_INTER);
3531%}
3532
3533//Double Immediate zero
3534operand immDPR0() %{
3535  // Do additional (and counter-intuitive) test against NaN to work around VC++
3536  // bug that generates code such that NaNs compare equal to 0.0
3537  predicate( UseSSE<=1 && n->getd() == 0.0 && !g_isnan(n->getd()) );
3538  match(ConD);
3539
3540  op_cost(5);
3541  format %{ %}
3542  interface(CONST_INTER);
3543%}
3544
3545// Double Immediate one
3546operand immDPR1() %{
3547  predicate( UseSSE<=1 && n->getd() == 1.0 );
3548  match(ConD);
3549
3550  op_cost(5);
3551  format %{ %}
3552  interface(CONST_INTER);
3553%}
3554
3555// Double Immediate
3556operand immDPR() %{
3557  predicate(UseSSE<=1);
3558  match(ConD);
3559
3560  op_cost(5);
3561  format %{ %}
3562  interface(CONST_INTER);
3563%}
3564
3565operand immD() %{
3566  predicate(UseSSE>=2);
3567  match(ConD);
3568
3569  op_cost(5);
3570  format %{ %}
3571  interface(CONST_INTER);
3572%}
3573
3574// Double Immediate zero
3575operand immD0() %{
3576  // Do additional (and counter-intuitive) test against NaN to work around VC++
3577  // bug that generates code such that NaNs compare equal to 0.0 AND do not
3578  // compare equal to -0.0.
3579  predicate( UseSSE>=2 && jlong_cast(n->getd()) == 0 );
3580  match(ConD);
3581
3582  format %{ %}
3583  interface(CONST_INTER);
3584%}
3585
3586// Float Immediate zero
3587operand immFPR0() %{
3588  predicate(UseSSE == 0 && n->getf() == 0.0F);
3589  match(ConF);
3590
3591  op_cost(5);
3592  format %{ %}
3593  interface(CONST_INTER);
3594%}
3595
3596// Float Immediate one
3597operand immFPR1() %{
3598  predicate(UseSSE == 0 && n->getf() == 1.0F);
3599  match(ConF);
3600
3601  op_cost(5);
3602  format %{ %}
3603  interface(CONST_INTER);
3604%}
3605
3606// Float Immediate
3607operand immFPR() %{
3608  predicate( UseSSE == 0 );
3609  match(ConF);
3610
3611  op_cost(5);
3612  format %{ %}
3613  interface(CONST_INTER);
3614%}
3615
3616// Float Immediate
3617operand immF() %{
3618  predicate(UseSSE >= 1);
3619  match(ConF);
3620
3621  op_cost(5);
3622  format %{ %}
3623  interface(CONST_INTER);
3624%}
3625
3626// Float Immediate zero.  Zero and not -0.0
3627operand immF0() %{
3628  predicate( UseSSE >= 1 && jint_cast(n->getf()) == 0 );
3629  match(ConF);
3630
3631  op_cost(5);
3632  format %{ %}
3633  interface(CONST_INTER);
3634%}
3635
3636// Immediates for special shifts (sign extend)
3637
3638// Constants for increment
3639operand immI_16() %{
3640  predicate( n->get_int() == 16 );
3641  match(ConI);
3642
3643  format %{ %}
3644  interface(CONST_INTER);
3645%}
3646
3647operand immI_24() %{
3648  predicate( n->get_int() == 24 );
3649  match(ConI);
3650
3651  format %{ %}
3652  interface(CONST_INTER);
3653%}
3654
3655// Constant for byte-wide masking
3656operand immI_255() %{
3657  predicate( n->get_int() == 255 );
3658  match(ConI);
3659
3660  format %{ %}
3661  interface(CONST_INTER);
3662%}
3663
3664// Constant for short-wide masking
3665operand immI_65535() %{
3666  predicate(n->get_int() == 65535);
3667  match(ConI);
3668
3669  format %{ %}
3670  interface(CONST_INTER);
3671%}
3672
3673// Register Operands
3674// Integer Register
3675operand rRegI() %{
3676  constraint(ALLOC_IN_RC(int_reg));
3677  match(RegI);
3678  match(xRegI);
3679  match(eAXRegI);
3680  match(eBXRegI);
3681  match(eCXRegI);
3682  match(eDXRegI);
3683  match(eDIRegI);
3684  match(eSIRegI);
3685
3686  format %{ %}
3687  interface(REG_INTER);
3688%}
3689
3690// Subset of Integer Register
3691operand xRegI(rRegI reg) %{
3692  constraint(ALLOC_IN_RC(int_x_reg));
3693  match(reg);
3694  match(eAXRegI);
3695  match(eBXRegI);
3696  match(eCXRegI);
3697  match(eDXRegI);
3698
3699  format %{ %}
3700  interface(REG_INTER);
3701%}
3702
3703// Special Registers
3704operand eAXRegI(xRegI reg) %{
3705  constraint(ALLOC_IN_RC(eax_reg));
3706  match(reg);
3707  match(rRegI);
3708
3709  format %{ "EAX" %}
3710  interface(REG_INTER);
3711%}
3712
3713// Special Registers
3714operand eBXRegI(xRegI reg) %{
3715  constraint(ALLOC_IN_RC(ebx_reg));
3716  match(reg);
3717  match(rRegI);
3718
3719  format %{ "EBX" %}
3720  interface(REG_INTER);
3721%}
3722
3723operand eCXRegI(xRegI reg) %{
3724  constraint(ALLOC_IN_RC(ecx_reg));
3725  match(reg);
3726  match(rRegI);
3727
3728  format %{ "ECX" %}
3729  interface(REG_INTER);
3730%}
3731
3732operand eDXRegI(xRegI reg) %{
3733  constraint(ALLOC_IN_RC(edx_reg));
3734  match(reg);
3735  match(rRegI);
3736
3737  format %{ "EDX" %}
3738  interface(REG_INTER);
3739%}
3740
3741operand eDIRegI(xRegI reg) %{
3742  constraint(ALLOC_IN_RC(edi_reg));
3743  match(reg);
3744  match(rRegI);
3745
3746  format %{ "EDI" %}
3747  interface(REG_INTER);
3748%}
3749
3750operand naxRegI() %{
3751  constraint(ALLOC_IN_RC(nax_reg));
3752  match(RegI);
3753  match(eCXRegI);
3754  match(eDXRegI);
3755  match(eSIRegI);
3756  match(eDIRegI);
3757
3758  format %{ %}
3759  interface(REG_INTER);
3760%}
3761
3762operand nadxRegI() %{
3763  constraint(ALLOC_IN_RC(nadx_reg));
3764  match(RegI);
3765  match(eBXRegI);
3766  match(eCXRegI);
3767  match(eSIRegI);
3768  match(eDIRegI);
3769
3770  format %{ %}
3771  interface(REG_INTER);
3772%}
3773
3774operand ncxRegI() %{
3775  constraint(ALLOC_IN_RC(ncx_reg));
3776  match(RegI);
3777  match(eAXRegI);
3778  match(eDXRegI);
3779  match(eSIRegI);
3780  match(eDIRegI);
3781
3782  format %{ %}
3783  interface(REG_INTER);
3784%}
3785
3786// // This operand was used by cmpFastUnlock, but conflicted with 'object' reg
3787// //
3788operand eSIRegI(xRegI reg) %{
3789   constraint(ALLOC_IN_RC(esi_reg));
3790   match(reg);
3791   match(rRegI);
3792
3793   format %{ "ESI" %}
3794   interface(REG_INTER);
3795%}
3796
3797// Pointer Register
3798operand anyRegP() %{
3799  constraint(ALLOC_IN_RC(any_reg));
3800  match(RegP);
3801  match(eAXRegP);
3802  match(eBXRegP);
3803  match(eCXRegP);
3804  match(eDIRegP);
3805  match(eRegP);
3806
3807  format %{ %}
3808  interface(REG_INTER);
3809%}
3810
3811operand eRegP() %{
3812  constraint(ALLOC_IN_RC(int_reg));
3813  match(RegP);
3814  match(eAXRegP);
3815  match(eBXRegP);
3816  match(eCXRegP);
3817  match(eDIRegP);
3818
3819  format %{ %}
3820  interface(REG_INTER);
3821%}
3822
3823// On windows95, EBP is not safe to use for implicit null tests.
3824operand eRegP_no_EBP() %{
3825  constraint(ALLOC_IN_RC(int_reg_no_ebp));
3826  match(RegP);
3827  match(eAXRegP);
3828  match(eBXRegP);
3829  match(eCXRegP);
3830  match(eDIRegP);
3831
3832  op_cost(100);
3833  format %{ %}
3834  interface(REG_INTER);
3835%}
3836
3837operand naxRegP() %{
3838  constraint(ALLOC_IN_RC(nax_reg));
3839  match(RegP);
3840  match(eBXRegP);
3841  match(eDXRegP);
3842  match(eCXRegP);
3843  match(eSIRegP);
3844  match(eDIRegP);
3845
3846  format %{ %}
3847  interface(REG_INTER);
3848%}
3849
3850operand nabxRegP() %{
3851  constraint(ALLOC_IN_RC(nabx_reg));
3852  match(RegP);
3853  match(eCXRegP);
3854  match(eDXRegP);
3855  match(eSIRegP);
3856  match(eDIRegP);
3857
3858  format %{ %}
3859  interface(REG_INTER);
3860%}
3861
3862operand pRegP() %{
3863  constraint(ALLOC_IN_RC(p_reg));
3864  match(RegP);
3865  match(eBXRegP);
3866  match(eDXRegP);
3867  match(eSIRegP);
3868  match(eDIRegP);
3869
3870  format %{ %}
3871  interface(REG_INTER);
3872%}
3873
3874// Special Registers
3875// Return a pointer value
3876operand eAXRegP(eRegP reg) %{
3877  constraint(ALLOC_IN_RC(eax_reg));
3878  match(reg);
3879  format %{ "EAX" %}
3880  interface(REG_INTER);
3881%}
3882
3883// Used in AtomicAdd
3884operand eBXRegP(eRegP reg) %{
3885  constraint(ALLOC_IN_RC(ebx_reg));
3886  match(reg);
3887  format %{ "EBX" %}
3888  interface(REG_INTER);
3889%}
3890
3891// Tail-call (interprocedural jump) to interpreter
3892operand eCXRegP(eRegP reg) %{
3893  constraint(ALLOC_IN_RC(ecx_reg));
3894  match(reg);
3895  format %{ "ECX" %}
3896  interface(REG_INTER);
3897%}
3898
3899operand eSIRegP(eRegP reg) %{
3900  constraint(ALLOC_IN_RC(esi_reg));
3901  match(reg);
3902  format %{ "ESI" %}
3903  interface(REG_INTER);
3904%}
3905
3906// Used in rep stosw
3907operand eDIRegP(eRegP reg) %{
3908  constraint(ALLOC_IN_RC(edi_reg));
3909  match(reg);
3910  format %{ "EDI" %}
3911  interface(REG_INTER);
3912%}
3913
3914operand eRegL() %{
3915  constraint(ALLOC_IN_RC(long_reg));
3916  match(RegL);
3917  match(eADXRegL);
3918
3919  format %{ %}
3920  interface(REG_INTER);
3921%}
3922
3923operand eADXRegL( eRegL reg ) %{
3924  constraint(ALLOC_IN_RC(eadx_reg));
3925  match(reg);
3926
3927  format %{ "EDX:EAX" %}
3928  interface(REG_INTER);
3929%}
3930
3931operand eBCXRegL( eRegL reg ) %{
3932  constraint(ALLOC_IN_RC(ebcx_reg));
3933  match(reg);
3934
3935  format %{ "EBX:ECX" %}
3936  interface(REG_INTER);
3937%}
3938
3939// Special case for integer high multiply
3940operand eADXRegL_low_only() %{
3941  constraint(ALLOC_IN_RC(eadx_reg));
3942  match(RegL);
3943
3944  format %{ "EAX" %}
3945  interface(REG_INTER);
3946%}
3947
3948// Flags register, used as output of compare instructions
3949operand eFlagsReg() %{
3950  constraint(ALLOC_IN_RC(int_flags));
3951  match(RegFlags);
3952
3953  format %{ "EFLAGS" %}
3954  interface(REG_INTER);
3955%}
3956
3957// Flags register, used as output of FLOATING POINT compare instructions
3958operand eFlagsRegU() %{
3959  constraint(ALLOC_IN_RC(int_flags));
3960  match(RegFlags);
3961
3962  format %{ "EFLAGS_U" %}
3963  interface(REG_INTER);
3964%}
3965
3966operand eFlagsRegUCF() %{
3967  constraint(ALLOC_IN_RC(int_flags));
3968  match(RegFlags);
3969  predicate(false);
3970
3971  format %{ "EFLAGS_U_CF" %}
3972  interface(REG_INTER);
3973%}
3974
3975// Condition Code Register used by long compare
3976operand flagsReg_long_LTGE() %{
3977  constraint(ALLOC_IN_RC(int_flags));
3978  match(RegFlags);
3979  format %{ "FLAGS_LTGE" %}
3980  interface(REG_INTER);
3981%}
3982operand flagsReg_long_EQNE() %{
3983  constraint(ALLOC_IN_RC(int_flags));
3984  match(RegFlags);
3985  format %{ "FLAGS_EQNE" %}
3986  interface(REG_INTER);
3987%}
3988operand flagsReg_long_LEGT() %{
3989  constraint(ALLOC_IN_RC(int_flags));
3990  match(RegFlags);
3991  format %{ "FLAGS_LEGT" %}
3992  interface(REG_INTER);
3993%}
3994
3995// Float register operands
3996operand regDPR() %{
3997  predicate( UseSSE < 2 );
3998  constraint(ALLOC_IN_RC(fp_dbl_reg));
3999  match(RegD);
4000  match(regDPR1);
4001  match(regDPR2);
4002  format %{ %}
4003  interface(REG_INTER);
4004%}
4005
4006operand regDPR1(regDPR reg) %{
4007  predicate( UseSSE < 2 );
4008  constraint(ALLOC_IN_RC(fp_dbl_reg0));
4009  match(reg);
4010  format %{ "FPR1" %}
4011  interface(REG_INTER);
4012%}
4013
4014operand regDPR2(regDPR reg) %{
4015  predicate( UseSSE < 2 );
4016  constraint(ALLOC_IN_RC(fp_dbl_reg1));
4017  match(reg);
4018  format %{ "FPR2" %}
4019  interface(REG_INTER);
4020%}
4021
4022operand regnotDPR1(regDPR reg) %{
4023  predicate( UseSSE < 2 );
4024  constraint(ALLOC_IN_RC(fp_dbl_notreg0));
4025  match(reg);
4026  format %{ %}
4027  interface(REG_INTER);
4028%}
4029
4030// Float register operands
4031operand regFPR() %{
4032  predicate( UseSSE < 2 );
4033  constraint(ALLOC_IN_RC(fp_flt_reg));
4034  match(RegF);
4035  match(regFPR1);
4036  format %{ %}
4037  interface(REG_INTER);
4038%}
4039
4040// Float register operands
4041operand regFPR1(regFPR reg) %{
4042  predicate( UseSSE < 2 );
4043  constraint(ALLOC_IN_RC(fp_flt_reg0));
4044  match(reg);
4045  format %{ "FPR1" %}
4046  interface(REG_INTER);
4047%}
4048
4049// XMM Float register operands
4050operand regF() %{
4051  predicate( UseSSE>=1 );
4052  constraint(ALLOC_IN_RC(float_reg_legacy));
4053  match(RegF);
4054  format %{ %}
4055  interface(REG_INTER);
4056%}
4057
4058// XMM Double register operands
4059operand regD() %{
4060  predicate( UseSSE>=2 );
4061  constraint(ALLOC_IN_RC(double_reg_legacy));
4062  match(RegD);
4063  format %{ %}
4064  interface(REG_INTER);
4065%}
4066
4067// Vectors : note, we use legacy registers to avoid extra (unneeded in 32-bit VM)
4068// runtime code generation via reg_class_dynamic.
4069operand vecS() %{
4070  constraint(ALLOC_IN_RC(vectors_reg_legacy));
4071  match(VecS);
4072
4073  format %{ %}
4074  interface(REG_INTER);
4075%}
4076
4077operand vecD() %{
4078  constraint(ALLOC_IN_RC(vectord_reg_legacy));
4079  match(VecD);
4080
4081  format %{ %}
4082  interface(REG_INTER);
4083%}
4084
4085operand vecX() %{
4086  constraint(ALLOC_IN_RC(vectorx_reg_legacy));
4087  match(VecX);
4088
4089  format %{ %}
4090  interface(REG_INTER);
4091%}
4092
4093operand vecY() %{
4094  constraint(ALLOC_IN_RC(vectory_reg_legacy));
4095  match(VecY);
4096
4097  format %{ %}
4098  interface(REG_INTER);
4099%}
4100
4101//----------Memory Operands----------------------------------------------------
4102// Direct Memory Operand
4103operand direct(immP addr) %{
4104  match(addr);
4105
4106  format %{ "[$addr]" %}
4107  interface(MEMORY_INTER) %{
4108    base(0xFFFFFFFF);
4109    index(0x4);
4110    scale(0x0);
4111    disp($addr);
4112  %}
4113%}
4114
4115// Indirect Memory Operand
4116operand indirect(eRegP reg) %{
4117  constraint(ALLOC_IN_RC(int_reg));
4118  match(reg);
4119
4120  format %{ "[$reg]" %}
4121  interface(MEMORY_INTER) %{
4122    base($reg);
4123    index(0x4);
4124    scale(0x0);
4125    disp(0x0);
4126  %}
4127%}
4128
4129// Indirect Memory Plus Short Offset Operand
4130operand indOffset8(eRegP reg, immI8 off) %{
4131  match(AddP reg off);
4132
4133  format %{ "[$reg + $off]" %}
4134  interface(MEMORY_INTER) %{
4135    base($reg);
4136    index(0x4);
4137    scale(0x0);
4138    disp($off);
4139  %}
4140%}
4141
4142// Indirect Memory Plus Long Offset Operand
4143operand indOffset32(eRegP reg, immI off) %{
4144  match(AddP reg off);
4145
4146  format %{ "[$reg + $off]" %}
4147  interface(MEMORY_INTER) %{
4148    base($reg);
4149    index(0x4);
4150    scale(0x0);
4151    disp($off);
4152  %}
4153%}
4154
4155// Indirect Memory Plus Long Offset Operand
4156operand indOffset32X(rRegI reg, immP off) %{
4157  match(AddP off reg);
4158
4159  format %{ "[$reg + $off]" %}
4160  interface(MEMORY_INTER) %{
4161    base($reg);
4162    index(0x4);
4163    scale(0x0);
4164    disp($off);
4165  %}
4166%}
4167
4168// Indirect Memory Plus Index Register Plus Offset Operand
4169operand indIndexOffset(eRegP reg, rRegI ireg, immI off) %{
4170  match(AddP (AddP reg ireg) off);
4171
4172  op_cost(10);
4173  format %{"[$reg + $off + $ireg]" %}
4174  interface(MEMORY_INTER) %{
4175    base($reg);
4176    index($ireg);
4177    scale(0x0);
4178    disp($off);
4179  %}
4180%}
4181
4182// Indirect Memory Plus Index Register Plus Offset Operand
4183operand indIndex(eRegP reg, rRegI ireg) %{
4184  match(AddP reg ireg);
4185
4186  op_cost(10);
4187  format %{"[$reg + $ireg]" %}
4188  interface(MEMORY_INTER) %{
4189    base($reg);
4190    index($ireg);
4191    scale(0x0);
4192    disp(0x0);
4193  %}
4194%}
4195
4196// // -------------------------------------------------------------------------
4197// // 486 architecture doesn't support "scale * index + offset" with out a base
4198// // -------------------------------------------------------------------------
4199// // Scaled Memory Operands
4200// // Indirect Memory Times Scale Plus Offset Operand
4201// operand indScaleOffset(immP off, rRegI ireg, immI2 scale) %{
4202//   match(AddP off (LShiftI ireg scale));
4203//
4204//   op_cost(10);
4205//   format %{"[$off + $ireg << $scale]" %}
4206//   interface(MEMORY_INTER) %{
4207//     base(0x4);
4208//     index($ireg);
4209//     scale($scale);
4210//     disp($off);
4211//   %}
4212// %}
4213
4214// Indirect Memory Times Scale Plus Index Register
4215operand indIndexScale(eRegP reg, rRegI ireg, immI2 scale) %{
4216  match(AddP reg (LShiftI ireg scale));
4217
4218  op_cost(10);
4219  format %{"[$reg + $ireg << $scale]" %}
4220  interface(MEMORY_INTER) %{
4221    base($reg);
4222    index($ireg);
4223    scale($scale);
4224    disp(0x0);
4225  %}
4226%}
4227
4228// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4229operand indIndexScaleOffset(eRegP reg, immI off, rRegI ireg, immI2 scale) %{
4230  match(AddP (AddP reg (LShiftI ireg scale)) off);
4231
4232  op_cost(10);
4233  format %{"[$reg + $off + $ireg << $scale]" %}
4234  interface(MEMORY_INTER) %{
4235    base($reg);
4236    index($ireg);
4237    scale($scale);
4238    disp($off);
4239  %}
4240%}
4241
4242//----------Load Long Memory Operands------------------------------------------
4243// The load-long idiom will use it's address expression again after loading
4244// the first word of the long.  If the load-long destination overlaps with
4245// registers used in the addressing expression, the 2nd half will be loaded
4246// from a clobbered address.  Fix this by requiring that load-long use
4247// address registers that do not overlap with the load-long target.
4248
4249// load-long support
4250operand load_long_RegP() %{
4251  constraint(ALLOC_IN_RC(esi_reg));
4252  match(RegP);
4253  match(eSIRegP);
4254  op_cost(100);
4255  format %{  %}
4256  interface(REG_INTER);
4257%}
4258
4259// Indirect Memory Operand Long
4260operand load_long_indirect(load_long_RegP reg) %{
4261  constraint(ALLOC_IN_RC(esi_reg));
4262  match(reg);
4263
4264  format %{ "[$reg]" %}
4265  interface(MEMORY_INTER) %{
4266    base($reg);
4267    index(0x4);
4268    scale(0x0);
4269    disp(0x0);
4270  %}
4271%}
4272
4273// Indirect Memory Plus Long Offset Operand
4274operand load_long_indOffset32(load_long_RegP reg, immI off) %{
4275  match(AddP reg off);
4276
4277  format %{ "[$reg + $off]" %}
4278  interface(MEMORY_INTER) %{
4279    base($reg);
4280    index(0x4);
4281    scale(0x0);
4282    disp($off);
4283  %}
4284%}
4285
4286opclass load_long_memory(load_long_indirect, load_long_indOffset32);
4287
4288
4289//----------Special Memory Operands--------------------------------------------
4290// Stack Slot Operand - This operand is used for loading and storing temporary
4291//                      values on the stack where a match requires a value to
4292//                      flow through memory.
4293operand stackSlotP(sRegP reg) %{
4294  constraint(ALLOC_IN_RC(stack_slots));
4295  // No match rule because this operand is only generated in matching
4296  format %{ "[$reg]" %}
4297  interface(MEMORY_INTER) %{
4298    base(0x4);   // ESP
4299    index(0x4);  // No Index
4300    scale(0x0);  // No Scale
4301    disp($reg);  // Stack Offset
4302  %}
4303%}
4304
4305operand stackSlotI(sRegI reg) %{
4306  constraint(ALLOC_IN_RC(stack_slots));
4307  // No match rule because this operand is only generated in matching
4308  format %{ "[$reg]" %}
4309  interface(MEMORY_INTER) %{
4310    base(0x4);   // ESP
4311    index(0x4);  // No Index
4312    scale(0x0);  // No Scale
4313    disp($reg);  // Stack Offset
4314  %}
4315%}
4316
4317operand stackSlotF(sRegF reg) %{
4318  constraint(ALLOC_IN_RC(stack_slots));
4319  // No match rule because this operand is only generated in matching
4320  format %{ "[$reg]" %}
4321  interface(MEMORY_INTER) %{
4322    base(0x4);   // ESP
4323    index(0x4);  // No Index
4324    scale(0x0);  // No Scale
4325    disp($reg);  // Stack Offset
4326  %}
4327%}
4328
4329operand stackSlotD(sRegD reg) %{
4330  constraint(ALLOC_IN_RC(stack_slots));
4331  // No match rule because this operand is only generated in matching
4332  format %{ "[$reg]" %}
4333  interface(MEMORY_INTER) %{
4334    base(0x4);   // ESP
4335    index(0x4);  // No Index
4336    scale(0x0);  // No Scale
4337    disp($reg);  // Stack Offset
4338  %}
4339%}
4340
4341operand stackSlotL(sRegL reg) %{
4342  constraint(ALLOC_IN_RC(stack_slots));
4343  // No match rule because this operand is only generated in matching
4344  format %{ "[$reg]" %}
4345  interface(MEMORY_INTER) %{
4346    base(0x4);   // ESP
4347    index(0x4);  // No Index
4348    scale(0x0);  // No Scale
4349    disp($reg);  // Stack Offset
4350  %}
4351%}
4352
4353//----------Memory Operands - Win95 Implicit Null Variants----------------
4354// Indirect Memory Operand
4355operand indirect_win95_safe(eRegP_no_EBP reg)
4356%{
4357  constraint(ALLOC_IN_RC(int_reg));
4358  match(reg);
4359
4360  op_cost(100);
4361  format %{ "[$reg]" %}
4362  interface(MEMORY_INTER) %{
4363    base($reg);
4364    index(0x4);
4365    scale(0x0);
4366    disp(0x0);
4367  %}
4368%}
4369
4370// Indirect Memory Plus Short Offset Operand
4371operand indOffset8_win95_safe(eRegP_no_EBP reg, immI8 off)
4372%{
4373  match(AddP reg off);
4374
4375  op_cost(100);
4376  format %{ "[$reg + $off]" %}
4377  interface(MEMORY_INTER) %{
4378    base($reg);
4379    index(0x4);
4380    scale(0x0);
4381    disp($off);
4382  %}
4383%}
4384
4385// Indirect Memory Plus Long Offset Operand
4386operand indOffset32_win95_safe(eRegP_no_EBP reg, immI off)
4387%{
4388  match(AddP reg off);
4389
4390  op_cost(100);
4391  format %{ "[$reg + $off]" %}
4392  interface(MEMORY_INTER) %{
4393    base($reg);
4394    index(0x4);
4395    scale(0x0);
4396    disp($off);
4397  %}
4398%}
4399
4400// Indirect Memory Plus Index Register Plus Offset Operand
4401operand indIndexOffset_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI off)
4402%{
4403  match(AddP (AddP reg ireg) off);
4404
4405  op_cost(100);
4406  format %{"[$reg + $off + $ireg]" %}
4407  interface(MEMORY_INTER) %{
4408    base($reg);
4409    index($ireg);
4410    scale(0x0);
4411    disp($off);
4412  %}
4413%}
4414
4415// Indirect Memory Times Scale Plus Index Register
4416operand indIndexScale_win95_safe(eRegP_no_EBP reg, rRegI ireg, immI2 scale)
4417%{
4418  match(AddP reg (LShiftI ireg scale));
4419
4420  op_cost(100);
4421  format %{"[$reg + $ireg << $scale]" %}
4422  interface(MEMORY_INTER) %{
4423    base($reg);
4424    index($ireg);
4425    scale($scale);
4426    disp(0x0);
4427  %}
4428%}
4429
4430// Indirect Memory Times Scale Plus Index Register Plus Offset Operand
4431operand indIndexScaleOffset_win95_safe(eRegP_no_EBP reg, immI off, rRegI ireg, immI2 scale)
4432%{
4433  match(AddP (AddP reg (LShiftI ireg scale)) off);
4434
4435  op_cost(100);
4436  format %{"[$reg + $off + $ireg << $scale]" %}
4437  interface(MEMORY_INTER) %{
4438    base($reg);
4439    index($ireg);
4440    scale($scale);
4441    disp($off);
4442  %}
4443%}
4444
4445//----------Conditional Branch Operands----------------------------------------
4446// Comparison Op  - This is the operation of the comparison, and is limited to
4447//                  the following set of codes:
4448//                  L (<), LE (<=), G (>), GE (>=), E (==), NE (!=)
4449//
4450// Other attributes of the comparison, such as unsignedness, are specified
4451// by the comparison instruction that sets a condition code flags register.
4452// That result is represented by a flags operand whose subtype is appropriate
4453// to the unsignedness (etc.) of the comparison.
4454//
4455// Later, the instruction which matches both the Comparison Op (a Bool) and
4456// the flags (produced by the Cmp) specifies the coding of the comparison op
4457// by matching a specific subtype of Bool operand below, such as cmpOpU.
4458
4459// Comparision Code
4460operand cmpOp() %{
4461  match(Bool);
4462
4463  format %{ "" %}
4464  interface(COND_INTER) %{
4465    equal(0x4, "e");
4466    not_equal(0x5, "ne");
4467    less(0xC, "l");
4468    greater_equal(0xD, "ge");
4469    less_equal(0xE, "le");
4470    greater(0xF, "g");
4471    overflow(0x0, "o");
4472    no_overflow(0x1, "no");
4473  %}
4474%}
4475
4476// Comparison Code, unsigned compare.  Used by FP also, with
4477// C2 (unordered) turned into GT or LT already.  The other bits
4478// C0 and C3 are turned into Carry & Zero flags.
4479operand cmpOpU() %{
4480  match(Bool);
4481
4482  format %{ "" %}
4483  interface(COND_INTER) %{
4484    equal(0x4, "e");
4485    not_equal(0x5, "ne");
4486    less(0x2, "b");
4487    greater_equal(0x3, "nb");
4488    less_equal(0x6, "be");
4489    greater(0x7, "nbe");
4490    overflow(0x0, "o");
4491    no_overflow(0x1, "no");
4492  %}
4493%}
4494
4495// Floating comparisons that don't require any fixup for the unordered case
4496operand cmpOpUCF() %{
4497  match(Bool);
4498  predicate(n->as_Bool()->_test._test == BoolTest::lt ||
4499            n->as_Bool()->_test._test == BoolTest::ge ||
4500            n->as_Bool()->_test._test == BoolTest::le ||
4501            n->as_Bool()->_test._test == BoolTest::gt);
4502  format %{ "" %}
4503  interface(COND_INTER) %{
4504    equal(0x4, "e");
4505    not_equal(0x5, "ne");
4506    less(0x2, "b");
4507    greater_equal(0x3, "nb");
4508    less_equal(0x6, "be");
4509    greater(0x7, "nbe");
4510    overflow(0x0, "o");
4511    no_overflow(0x1, "no");
4512  %}
4513%}
4514
4515
4516// Floating comparisons that can be fixed up with extra conditional jumps
4517operand cmpOpUCF2() %{
4518  match(Bool);
4519  predicate(n->as_Bool()->_test._test == BoolTest::ne ||
4520            n->as_Bool()->_test._test == BoolTest::eq);
4521  format %{ "" %}
4522  interface(COND_INTER) %{
4523    equal(0x4, "e");
4524    not_equal(0x5, "ne");
4525    less(0x2, "b");
4526    greater_equal(0x3, "nb");
4527    less_equal(0x6, "be");
4528    greater(0x7, "nbe");
4529    overflow(0x0, "o");
4530    no_overflow(0x1, "no");
4531  %}
4532%}
4533
4534// Comparison Code for FP conditional move
4535operand cmpOp_fcmov() %{
4536  match(Bool);
4537
4538  predicate(n->as_Bool()->_test._test != BoolTest::overflow &&
4539            n->as_Bool()->_test._test != BoolTest::no_overflow);
4540  format %{ "" %}
4541  interface(COND_INTER) %{
4542    equal        (0x0C8);
4543    not_equal    (0x1C8);
4544    less         (0x0C0);
4545    greater_equal(0x1C0);
4546    less_equal   (0x0D0);
4547    greater      (0x1D0);
4548    overflow(0x0, "o"); // not really supported by the instruction
4549    no_overflow(0x1, "no"); // not really supported by the instruction
4550  %}
4551%}
4552
4553// Comparision Code used in long compares
4554operand cmpOp_commute() %{
4555  match(Bool);
4556
4557  format %{ "" %}
4558  interface(COND_INTER) %{
4559    equal(0x4, "e");
4560    not_equal(0x5, "ne");
4561    less(0xF, "g");
4562    greater_equal(0xE, "le");
4563    less_equal(0xD, "ge");
4564    greater(0xC, "l");
4565    overflow(0x0, "o");
4566    no_overflow(0x1, "no");
4567  %}
4568%}
4569
4570//----------OPERAND CLASSES----------------------------------------------------
4571// Operand Classes are groups of operands that are used as to simplify
4572// instruction definitions by not requiring the AD writer to specify separate
4573// instructions for every form of operand when the instruction accepts
4574// multiple operand types with the same basic encoding and format.  The classic
4575// case of this is memory operands.
4576
4577opclass memory(direct, indirect, indOffset8, indOffset32, indOffset32X, indIndexOffset,
4578               indIndex, indIndexScale, indIndexScaleOffset);
4579
4580// Long memory operations are encoded in 2 instructions and a +4 offset.
4581// This means some kind of offset is always required and you cannot use
4582// an oop as the offset (done when working on static globals).
4583opclass long_memory(direct, indirect, indOffset8, indOffset32, indIndexOffset,
4584                    indIndex, indIndexScale, indIndexScaleOffset);
4585
4586
4587//----------PIPELINE-----------------------------------------------------------
4588// Rules which define the behavior of the target architectures pipeline.
4589pipeline %{
4590
4591//----------ATTRIBUTES---------------------------------------------------------
4592attributes %{
4593  variable_size_instructions;        // Fixed size instructions
4594  max_instructions_per_bundle = 3;   // Up to 3 instructions per bundle
4595  instruction_unit_size = 1;         // An instruction is 1 bytes long
4596  instruction_fetch_unit_size = 16;  // The processor fetches one line
4597  instruction_fetch_units = 1;       // of 16 bytes
4598
4599  // List of nop instructions
4600  nops( MachNop );
4601%}
4602
4603//----------RESOURCES----------------------------------------------------------
4604// Resources are the functional units available to the machine
4605
4606// Generic P2/P3 pipeline
4607// 3 decoders, only D0 handles big operands; a "bundle" is the limit of
4608// 3 instructions decoded per cycle.
4609// 2 load/store ops per cycle, 1 branch, 1 FPU,
4610// 2 ALU op, only ALU0 handles mul/div instructions.
4611resources( D0, D1, D2, DECODE = D0 | D1 | D2,
4612           MS0, MS1, MEM = MS0 | MS1,
4613           BR, FPU,
4614           ALU0, ALU1, ALU = ALU0 | ALU1 );
4615
4616//----------PIPELINE DESCRIPTION-----------------------------------------------
4617// Pipeline Description specifies the stages in the machine's pipeline
4618
4619// Generic P2/P3 pipeline
4620pipe_desc(S0, S1, S2, S3, S4, S5);
4621
4622//----------PIPELINE CLASSES---------------------------------------------------
4623// Pipeline Classes describe the stages in which input and output are
4624// referenced by the hardware pipeline.
4625
4626// Naming convention: ialu or fpu
4627// Then: _reg
4628// Then: _reg if there is a 2nd register
4629// Then: _long if it's a pair of instructions implementing a long
4630// Then: _fat if it requires the big decoder
4631//   Or: _mem if it requires the big decoder and a memory unit.
4632
4633// Integer ALU reg operation
4634pipe_class ialu_reg(rRegI dst) %{
4635    single_instruction;
4636    dst    : S4(write);
4637    dst    : S3(read);
4638    DECODE : S0;        // any decoder
4639    ALU    : S3;        // any alu
4640%}
4641
4642// Long ALU reg operation
4643pipe_class ialu_reg_long(eRegL dst) %{
4644    instruction_count(2);
4645    dst    : S4(write);
4646    dst    : S3(read);
4647    DECODE : S0(2);     // any 2 decoders
4648    ALU    : S3(2);     // both alus
4649%}
4650
4651// Integer ALU reg operation using big decoder
4652pipe_class ialu_reg_fat(rRegI dst) %{
4653    single_instruction;
4654    dst    : S4(write);
4655    dst    : S3(read);
4656    D0     : S0;        // big decoder only
4657    ALU    : S3;        // any alu
4658%}
4659
4660// Long ALU reg operation using big decoder
4661pipe_class ialu_reg_long_fat(eRegL dst) %{
4662    instruction_count(2);
4663    dst    : S4(write);
4664    dst    : S3(read);
4665    D0     : S0(2);     // big decoder only; twice
4666    ALU    : S3(2);     // any 2 alus
4667%}
4668
4669// Integer ALU reg-reg operation
4670pipe_class ialu_reg_reg(rRegI dst, rRegI src) %{
4671    single_instruction;
4672    dst    : S4(write);
4673    src    : S3(read);
4674    DECODE : S0;        // any decoder
4675    ALU    : S3;        // any alu
4676%}
4677
4678// Long ALU reg-reg operation
4679pipe_class ialu_reg_reg_long(eRegL dst, eRegL src) %{
4680    instruction_count(2);
4681    dst    : S4(write);
4682    src    : S3(read);
4683    DECODE : S0(2);     // any 2 decoders
4684    ALU    : S3(2);     // both alus
4685%}
4686
4687// Integer ALU reg-reg operation
4688pipe_class ialu_reg_reg_fat(rRegI dst, memory src) %{
4689    single_instruction;
4690    dst    : S4(write);
4691    src    : S3(read);
4692    D0     : S0;        // big decoder only
4693    ALU    : S3;        // any alu
4694%}
4695
4696// Long ALU reg-reg operation
4697pipe_class ialu_reg_reg_long_fat(eRegL dst, eRegL src) %{
4698    instruction_count(2);
4699    dst    : S4(write);
4700    src    : S3(read);
4701    D0     : S0(2);     // big decoder only; twice
4702    ALU    : S3(2);     // both alus
4703%}
4704
4705// Integer ALU reg-mem operation
4706pipe_class ialu_reg_mem(rRegI dst, memory mem) %{
4707    single_instruction;
4708    dst    : S5(write);
4709    mem    : S3(read);
4710    D0     : S0;        // big decoder only
4711    ALU    : S4;        // any alu
4712    MEM    : S3;        // any mem
4713%}
4714
4715// Long ALU reg-mem operation
4716pipe_class ialu_reg_long_mem(eRegL dst, load_long_memory mem) %{
4717    instruction_count(2);
4718    dst    : S5(write);
4719    mem    : S3(read);
4720    D0     : S0(2);     // big decoder only; twice
4721    ALU    : S4(2);     // any 2 alus
4722    MEM    : S3(2);     // both mems
4723%}
4724
4725// Integer mem operation (prefetch)
4726pipe_class ialu_mem(memory mem)
4727%{
4728    single_instruction;
4729    mem    : S3(read);
4730    D0     : S0;        // big decoder only
4731    MEM    : S3;        // any mem
4732%}
4733
4734// Integer Store to Memory
4735pipe_class ialu_mem_reg(memory mem, rRegI src) %{
4736    single_instruction;
4737    mem    : S3(read);
4738    src    : S5(read);
4739    D0     : S0;        // big decoder only
4740    ALU    : S4;        // any alu
4741    MEM    : S3;
4742%}
4743
4744// Long Store to Memory
4745pipe_class ialu_mem_long_reg(memory mem, eRegL src) %{
4746    instruction_count(2);
4747    mem    : S3(read);
4748    src    : S5(read);
4749    D0     : S0(2);     // big decoder only; twice
4750    ALU    : S4(2);     // any 2 alus
4751    MEM    : S3(2);     // Both mems
4752%}
4753
4754// Integer Store to Memory
4755pipe_class ialu_mem_imm(memory mem) %{
4756    single_instruction;
4757    mem    : S3(read);
4758    D0     : S0;        // big decoder only
4759    ALU    : S4;        // any alu
4760    MEM    : S3;
4761%}
4762
4763// Integer ALU0 reg-reg operation
4764pipe_class ialu_reg_reg_alu0(rRegI dst, rRegI src) %{
4765    single_instruction;
4766    dst    : S4(write);
4767    src    : S3(read);
4768    D0     : S0;        // Big decoder only
4769    ALU0   : S3;        // only alu0
4770%}
4771
4772// Integer ALU0 reg-mem operation
4773pipe_class ialu_reg_mem_alu0(rRegI dst, memory mem) %{
4774    single_instruction;
4775    dst    : S5(write);
4776    mem    : S3(read);
4777    D0     : S0;        // big decoder only
4778    ALU0   : S4;        // ALU0 only
4779    MEM    : S3;        // any mem
4780%}
4781
4782// Integer ALU reg-reg operation
4783pipe_class ialu_cr_reg_reg(eFlagsReg cr, rRegI src1, rRegI src2) %{
4784    single_instruction;
4785    cr     : S4(write);
4786    src1   : S3(read);
4787    src2   : S3(read);
4788    DECODE : S0;        // any decoder
4789    ALU    : S3;        // any alu
4790%}
4791
4792// Integer ALU reg-imm operation
4793pipe_class ialu_cr_reg_imm(eFlagsReg cr, rRegI src1) %{
4794    single_instruction;
4795    cr     : S4(write);
4796    src1   : S3(read);
4797    DECODE : S0;        // any decoder
4798    ALU    : S3;        // any alu
4799%}
4800
4801// Integer ALU reg-mem operation
4802pipe_class ialu_cr_reg_mem(eFlagsReg cr, rRegI src1, memory src2) %{
4803    single_instruction;
4804    cr     : S4(write);
4805    src1   : S3(read);
4806    src2   : S3(read);
4807    D0     : S0;        // big decoder only
4808    ALU    : S4;        // any alu
4809    MEM    : S3;
4810%}
4811
4812// Conditional move reg-reg
4813pipe_class pipe_cmplt( rRegI p, rRegI q, rRegI y ) %{
4814    instruction_count(4);
4815    y      : S4(read);
4816    q      : S3(read);
4817    p      : S3(read);
4818    DECODE : S0(4);     // any decoder
4819%}
4820
4821// Conditional move reg-reg
4822pipe_class pipe_cmov_reg( rRegI dst, rRegI src, eFlagsReg cr ) %{
4823    single_instruction;
4824    dst    : S4(write);
4825    src    : S3(read);
4826    cr     : S3(read);
4827    DECODE : S0;        // any decoder
4828%}
4829
4830// Conditional move reg-mem
4831pipe_class pipe_cmov_mem( eFlagsReg cr, rRegI dst, memory src) %{
4832    single_instruction;
4833    dst    : S4(write);
4834    src    : S3(read);
4835    cr     : S3(read);
4836    DECODE : S0;        // any decoder
4837    MEM    : S3;
4838%}
4839
4840// Conditional move reg-reg long
4841pipe_class pipe_cmov_reg_long( eFlagsReg cr, eRegL dst, eRegL src) %{
4842    single_instruction;
4843    dst    : S4(write);
4844    src    : S3(read);
4845    cr     : S3(read);
4846    DECODE : S0(2);     // any 2 decoders
4847%}
4848
4849// Conditional move double reg-reg
4850pipe_class pipe_cmovDPR_reg( eFlagsReg cr, regDPR1 dst, regDPR src) %{
4851    single_instruction;
4852    dst    : S4(write);
4853    src    : S3(read);
4854    cr     : S3(read);
4855    DECODE : S0;        // any decoder
4856%}
4857
4858// Float reg-reg operation
4859pipe_class fpu_reg(regDPR dst) %{
4860    instruction_count(2);
4861    dst    : S3(read);
4862    DECODE : S0(2);     // any 2 decoders
4863    FPU    : S3;
4864%}
4865
4866// Float reg-reg operation
4867pipe_class fpu_reg_reg(regDPR dst, regDPR src) %{
4868    instruction_count(2);
4869    dst    : S4(write);
4870    src    : S3(read);
4871    DECODE : S0(2);     // any 2 decoders
4872    FPU    : S3;
4873%}
4874
4875// Float reg-reg operation
4876pipe_class fpu_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2) %{
4877    instruction_count(3);
4878    dst    : S4(write);
4879    src1   : S3(read);
4880    src2   : S3(read);
4881    DECODE : S0(3);     // any 3 decoders
4882    FPU    : S3(2);
4883%}
4884
4885// Float reg-reg operation
4886pipe_class fpu_reg_reg_reg_reg(regDPR dst, regDPR src1, regDPR src2, regDPR src3) %{
4887    instruction_count(4);
4888    dst    : S4(write);
4889    src1   : S3(read);
4890    src2   : S3(read);
4891    src3   : S3(read);
4892    DECODE : S0(4);     // any 3 decoders
4893    FPU    : S3(2);
4894%}
4895
4896// Float reg-reg operation
4897pipe_class fpu_reg_mem_reg_reg(regDPR dst, memory src1, regDPR src2, regDPR src3) %{
4898    instruction_count(4);
4899    dst    : S4(write);
4900    src1   : S3(read);
4901    src2   : S3(read);
4902    src3   : S3(read);
4903    DECODE : S1(3);     // any 3 decoders
4904    D0     : S0;        // Big decoder only
4905    FPU    : S3(2);
4906    MEM    : S3;
4907%}
4908
4909// Float reg-mem operation
4910pipe_class fpu_reg_mem(regDPR dst, memory mem) %{
4911    instruction_count(2);
4912    dst    : S5(write);
4913    mem    : S3(read);
4914    D0     : S0;        // big decoder only
4915    DECODE : S1;        // any decoder for FPU POP
4916    FPU    : S4;
4917    MEM    : S3;        // any mem
4918%}
4919
4920// Float reg-mem operation
4921pipe_class fpu_reg_reg_mem(regDPR dst, regDPR src1, memory mem) %{
4922    instruction_count(3);
4923    dst    : S5(write);
4924    src1   : S3(read);
4925    mem    : S3(read);
4926    D0     : S0;        // big decoder only
4927    DECODE : S1(2);     // any decoder for FPU POP
4928    FPU    : S4;
4929    MEM    : S3;        // any mem
4930%}
4931
4932// Float mem-reg operation
4933pipe_class fpu_mem_reg(memory mem, regDPR src) %{
4934    instruction_count(2);
4935    src    : S5(read);
4936    mem    : S3(read);
4937    DECODE : S0;        // any decoder for FPU PUSH
4938    D0     : S1;        // big decoder only
4939    FPU    : S4;
4940    MEM    : S3;        // any mem
4941%}
4942
4943pipe_class fpu_mem_reg_reg(memory mem, regDPR src1, regDPR src2) %{
4944    instruction_count(3);
4945    src1   : S3(read);
4946    src2   : S3(read);
4947    mem    : S3(read);
4948    DECODE : S0(2);     // any decoder for FPU PUSH
4949    D0     : S1;        // big decoder only
4950    FPU    : S4;
4951    MEM    : S3;        // any mem
4952%}
4953
4954pipe_class fpu_mem_reg_mem(memory mem, regDPR src1, memory src2) %{
4955    instruction_count(3);
4956    src1   : S3(read);
4957    src2   : S3(read);
4958    mem    : S4(read);
4959    DECODE : S0;        // any decoder for FPU PUSH
4960    D0     : S0(2);     // big decoder only
4961    FPU    : S4;
4962    MEM    : S3(2);     // any mem
4963%}
4964
4965pipe_class fpu_mem_mem(memory dst, memory src1) %{
4966    instruction_count(2);
4967    src1   : S3(read);
4968    dst    : S4(read);
4969    D0     : S0(2);     // big decoder only
4970    MEM    : S3(2);     // any mem
4971%}
4972
4973pipe_class fpu_mem_mem_mem(memory dst, memory src1, memory src2) %{
4974    instruction_count(3);
4975    src1   : S3(read);
4976    src2   : S3(read);
4977    dst    : S4(read);
4978    D0     : S0(3);     // big decoder only
4979    FPU    : S4;
4980    MEM    : S3(3);     // any mem
4981%}
4982
4983pipe_class fpu_mem_reg_con(memory mem, regDPR src1) %{
4984    instruction_count(3);
4985    src1   : S4(read);
4986    mem    : S4(read);
4987    DECODE : S0;        // any decoder for FPU PUSH
4988    D0     : S0(2);     // big decoder only
4989    FPU    : S4;
4990    MEM    : S3(2);     // any mem
4991%}
4992
4993// Float load constant
4994pipe_class fpu_reg_con(regDPR dst) %{
4995    instruction_count(2);
4996    dst    : S5(write);
4997    D0     : S0;        // big decoder only for the load
4998    DECODE : S1;        // any decoder for FPU POP
4999    FPU    : S4;
5000    MEM    : S3;        // any mem
5001%}
5002
5003// Float load constant
5004pipe_class fpu_reg_reg_con(regDPR dst, regDPR src) %{
5005    instruction_count(3);
5006    dst    : S5(write);
5007    src    : S3(read);
5008    D0     : S0;        // big decoder only for the load
5009    DECODE : S1(2);     // any decoder for FPU POP
5010    FPU    : S4;
5011    MEM    : S3;        // any mem
5012%}
5013
5014// UnConditional branch
5015pipe_class pipe_jmp( label labl ) %{
5016    single_instruction;
5017    BR   : S3;
5018%}
5019
5020// Conditional branch
5021pipe_class pipe_jcc( cmpOp cmp, eFlagsReg cr, label labl ) %{
5022    single_instruction;
5023    cr    : S1(read);
5024    BR    : S3;
5025%}
5026
5027// Allocation idiom
5028pipe_class pipe_cmpxchg( eRegP dst, eRegP heap_ptr ) %{
5029    instruction_count(1); force_serialization;
5030    fixed_latency(6);
5031    heap_ptr : S3(read);
5032    DECODE   : S0(3);
5033    D0       : S2;
5034    MEM      : S3;
5035    ALU      : S3(2);
5036    dst      : S5(write);
5037    BR       : S5;
5038%}
5039
5040// Generic big/slow expanded idiom
5041pipe_class pipe_slow(  ) %{
5042    instruction_count(10); multiple_bundles; force_serialization;
5043    fixed_latency(100);
5044    D0  : S0(2);
5045    MEM : S3(2);
5046%}
5047
5048// The real do-nothing guy
5049pipe_class empty( ) %{
5050    instruction_count(0);
5051%}
5052
5053// Define the class for the Nop node
5054define %{
5055   MachNop = empty;
5056%}
5057
5058%}
5059
5060//----------INSTRUCTIONS-------------------------------------------------------
5061//
5062// match      -- States which machine-independent subtree may be replaced
5063//               by this instruction.
5064// ins_cost   -- The estimated cost of this instruction is used by instruction
5065//               selection to identify a minimum cost tree of machine
5066//               instructions that matches a tree of machine-independent
5067//               instructions.
5068// format     -- A string providing the disassembly for this instruction.
5069//               The value of an instruction's operand may be inserted
5070//               by referring to it with a '$' prefix.
5071// opcode     -- Three instruction opcodes may be provided.  These are referred
5072//               to within an encode class as $primary, $secondary, and $tertiary
5073//               respectively.  The primary opcode is commonly used to
5074//               indicate the type of machine instruction, while secondary
5075//               and tertiary are often used for prefix options or addressing
5076//               modes.
5077// ins_encode -- A list of encode classes with parameters. The encode class
5078//               name must have been defined in an 'enc_class' specification
5079//               in the encode section of the architecture description.
5080
5081//----------BSWAP-Instruction--------------------------------------------------
5082instruct bytes_reverse_int(rRegI dst) %{
5083  match(Set dst (ReverseBytesI dst));
5084
5085  format %{ "BSWAP  $dst" %}
5086  opcode(0x0F, 0xC8);
5087  ins_encode( OpcP, OpcSReg(dst) );
5088  ins_pipe( ialu_reg );
5089%}
5090
5091instruct bytes_reverse_long(eRegL dst) %{
5092  match(Set dst (ReverseBytesL dst));
5093
5094  format %{ "BSWAP  $dst.lo\n\t"
5095            "BSWAP  $dst.hi\n\t"
5096            "XCHG   $dst.lo $dst.hi" %}
5097
5098  ins_cost(125);
5099  ins_encode( bswap_long_bytes(dst) );
5100  ins_pipe( ialu_reg_reg);
5101%}
5102
5103instruct bytes_reverse_unsigned_short(rRegI dst, eFlagsReg cr) %{
5104  match(Set dst (ReverseBytesUS dst));
5105  effect(KILL cr);
5106
5107  format %{ "BSWAP  $dst\n\t"
5108            "SHR    $dst,16\n\t" %}
5109  ins_encode %{
5110    __ bswapl($dst$$Register);
5111    __ shrl($dst$$Register, 16);
5112  %}
5113  ins_pipe( ialu_reg );
5114%}
5115
5116instruct bytes_reverse_short(rRegI dst, eFlagsReg cr) %{
5117  match(Set dst (ReverseBytesS dst));
5118  effect(KILL cr);
5119
5120  format %{ "BSWAP  $dst\n\t"
5121            "SAR    $dst,16\n\t" %}
5122  ins_encode %{
5123    __ bswapl($dst$$Register);
5124    __ sarl($dst$$Register, 16);
5125  %}
5126  ins_pipe( ialu_reg );
5127%}
5128
5129
5130//---------- Zeros Count Instructions ------------------------------------------
5131
5132instruct countLeadingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5133  predicate(UseCountLeadingZerosInstruction);
5134  match(Set dst (CountLeadingZerosI src));
5135  effect(KILL cr);
5136
5137  format %{ "LZCNT  $dst, $src\t# count leading zeros (int)" %}
5138  ins_encode %{
5139    __ lzcntl($dst$$Register, $src$$Register);
5140  %}
5141  ins_pipe(ialu_reg);
5142%}
5143
5144instruct countLeadingZerosI_bsr(rRegI dst, rRegI src, eFlagsReg cr) %{
5145  predicate(!UseCountLeadingZerosInstruction);
5146  match(Set dst (CountLeadingZerosI src));
5147  effect(KILL cr);
5148
5149  format %{ "BSR    $dst, $src\t# count leading zeros (int)\n\t"
5150            "JNZ    skip\n\t"
5151            "MOV    $dst, -1\n"
5152      "skip:\n\t"
5153            "NEG    $dst\n\t"
5154            "ADD    $dst, 31" %}
5155  ins_encode %{
5156    Register Rdst = $dst$$Register;
5157    Register Rsrc = $src$$Register;
5158    Label skip;
5159    __ bsrl(Rdst, Rsrc);
5160    __ jccb(Assembler::notZero, skip);
5161    __ movl(Rdst, -1);
5162    __ bind(skip);
5163    __ negl(Rdst);
5164    __ addl(Rdst, BitsPerInt - 1);
5165  %}
5166  ins_pipe(ialu_reg);
5167%}
5168
5169instruct countLeadingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5170  predicate(UseCountLeadingZerosInstruction);
5171  match(Set dst (CountLeadingZerosL src));
5172  effect(TEMP dst, KILL cr);
5173
5174  format %{ "LZCNT  $dst, $src.hi\t# count leading zeros (long)\n\t"
5175            "JNC    done\n\t"
5176            "LZCNT  $dst, $src.lo\n\t"
5177            "ADD    $dst, 32\n"
5178      "done:" %}
5179  ins_encode %{
5180    Register Rdst = $dst$$Register;
5181    Register Rsrc = $src$$Register;
5182    Label done;
5183    __ lzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5184    __ jccb(Assembler::carryClear, done);
5185    __ lzcntl(Rdst, Rsrc);
5186    __ addl(Rdst, BitsPerInt);
5187    __ bind(done);
5188  %}
5189  ins_pipe(ialu_reg);
5190%}
5191
5192instruct countLeadingZerosL_bsr(rRegI dst, eRegL src, eFlagsReg cr) %{
5193  predicate(!UseCountLeadingZerosInstruction);
5194  match(Set dst (CountLeadingZerosL src));
5195  effect(TEMP dst, KILL cr);
5196
5197  format %{ "BSR    $dst, $src.hi\t# count leading zeros (long)\n\t"
5198            "JZ     msw_is_zero\n\t"
5199            "ADD    $dst, 32\n\t"
5200            "JMP    not_zero\n"
5201      "msw_is_zero:\n\t"
5202            "BSR    $dst, $src.lo\n\t"
5203            "JNZ    not_zero\n\t"
5204            "MOV    $dst, -1\n"
5205      "not_zero:\n\t"
5206            "NEG    $dst\n\t"
5207            "ADD    $dst, 63\n" %}
5208 ins_encode %{
5209    Register Rdst = $dst$$Register;
5210    Register Rsrc = $src$$Register;
5211    Label msw_is_zero;
5212    Label not_zero;
5213    __ bsrl(Rdst, HIGH_FROM_LOW(Rsrc));
5214    __ jccb(Assembler::zero, msw_is_zero);
5215    __ addl(Rdst, BitsPerInt);
5216    __ jmpb(not_zero);
5217    __ bind(msw_is_zero);
5218    __ bsrl(Rdst, Rsrc);
5219    __ jccb(Assembler::notZero, not_zero);
5220    __ movl(Rdst, -1);
5221    __ bind(not_zero);
5222    __ negl(Rdst);
5223    __ addl(Rdst, BitsPerLong - 1);
5224  %}
5225  ins_pipe(ialu_reg);
5226%}
5227
5228instruct countTrailingZerosI(rRegI dst, rRegI src, eFlagsReg cr) %{
5229  predicate(UseCountTrailingZerosInstruction);
5230  match(Set dst (CountTrailingZerosI src));
5231  effect(KILL cr);
5232
5233  format %{ "TZCNT    $dst, $src\t# count trailing zeros (int)" %}
5234  ins_encode %{
5235    __ tzcntl($dst$$Register, $src$$Register);
5236  %}
5237  ins_pipe(ialu_reg);
5238%}
5239
5240instruct countTrailingZerosI_bsf(rRegI dst, rRegI src, eFlagsReg cr) %{
5241  predicate(!UseCountTrailingZerosInstruction);
5242  match(Set dst (CountTrailingZerosI src));
5243  effect(KILL cr);
5244
5245  format %{ "BSF    $dst, $src\t# count trailing zeros (int)\n\t"
5246            "JNZ    done\n\t"
5247            "MOV    $dst, 32\n"
5248      "done:" %}
5249  ins_encode %{
5250    Register Rdst = $dst$$Register;
5251    Label done;
5252    __ bsfl(Rdst, $src$$Register);
5253    __ jccb(Assembler::notZero, done);
5254    __ movl(Rdst, BitsPerInt);
5255    __ bind(done);
5256  %}
5257  ins_pipe(ialu_reg);
5258%}
5259
5260instruct countTrailingZerosL(rRegI dst, eRegL src, eFlagsReg cr) %{
5261  predicate(UseCountTrailingZerosInstruction);
5262  match(Set dst (CountTrailingZerosL src));
5263  effect(TEMP dst, KILL cr);
5264
5265  format %{ "TZCNT  $dst, $src.lo\t# count trailing zeros (long) \n\t"
5266            "JNC    done\n\t"
5267            "TZCNT  $dst, $src.hi\n\t"
5268            "ADD    $dst, 32\n"
5269            "done:" %}
5270  ins_encode %{
5271    Register Rdst = $dst$$Register;
5272    Register Rsrc = $src$$Register;
5273    Label done;
5274    __ tzcntl(Rdst, Rsrc);
5275    __ jccb(Assembler::carryClear, done);
5276    __ tzcntl(Rdst, HIGH_FROM_LOW(Rsrc));
5277    __ addl(Rdst, BitsPerInt);
5278    __ bind(done);
5279  %}
5280  ins_pipe(ialu_reg);
5281%}
5282
5283instruct countTrailingZerosL_bsf(rRegI dst, eRegL src, eFlagsReg cr) %{
5284  predicate(!UseCountTrailingZerosInstruction);
5285  match(Set dst (CountTrailingZerosL src));
5286  effect(TEMP dst, KILL cr);
5287
5288  format %{ "BSF    $dst, $src.lo\t# count trailing zeros (long)\n\t"
5289            "JNZ    done\n\t"
5290            "BSF    $dst, $src.hi\n\t"
5291            "JNZ    msw_not_zero\n\t"
5292            "MOV    $dst, 32\n"
5293      "msw_not_zero:\n\t"
5294            "ADD    $dst, 32\n"
5295      "done:" %}
5296  ins_encode %{
5297    Register Rdst = $dst$$Register;
5298    Register Rsrc = $src$$Register;
5299    Label msw_not_zero;
5300    Label done;
5301    __ bsfl(Rdst, Rsrc);
5302    __ jccb(Assembler::notZero, done);
5303    __ bsfl(Rdst, HIGH_FROM_LOW(Rsrc));
5304    __ jccb(Assembler::notZero, msw_not_zero);
5305    __ movl(Rdst, BitsPerInt);
5306    __ bind(msw_not_zero);
5307    __ addl(Rdst, BitsPerInt);
5308    __ bind(done);
5309  %}
5310  ins_pipe(ialu_reg);
5311%}
5312
5313
5314//---------- Population Count Instructions -------------------------------------
5315
5316instruct popCountI(rRegI dst, rRegI src, eFlagsReg cr) %{
5317  predicate(UsePopCountInstruction);
5318  match(Set dst (PopCountI src));
5319  effect(KILL cr);
5320
5321  format %{ "POPCNT $dst, $src" %}
5322  ins_encode %{
5323    __ popcntl($dst$$Register, $src$$Register);
5324  %}
5325  ins_pipe(ialu_reg);
5326%}
5327
5328instruct popCountI_mem(rRegI dst, memory mem, eFlagsReg cr) %{
5329  predicate(UsePopCountInstruction);
5330  match(Set dst (PopCountI (LoadI mem)));
5331  effect(KILL cr);
5332
5333  format %{ "POPCNT $dst, $mem" %}
5334  ins_encode %{
5335    __ popcntl($dst$$Register, $mem$$Address);
5336  %}
5337  ins_pipe(ialu_reg);
5338%}
5339
5340// Note: Long.bitCount(long) returns an int.
5341instruct popCountL(rRegI dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
5342  predicate(UsePopCountInstruction);
5343  match(Set dst (PopCountL src));
5344  effect(KILL cr, TEMP tmp, TEMP dst);
5345
5346  format %{ "POPCNT $dst, $src.lo\n\t"
5347            "POPCNT $tmp, $src.hi\n\t"
5348            "ADD    $dst, $tmp" %}
5349  ins_encode %{
5350    __ popcntl($dst$$Register, $src$$Register);
5351    __ popcntl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
5352    __ addl($dst$$Register, $tmp$$Register);
5353  %}
5354  ins_pipe(ialu_reg);
5355%}
5356
5357// Note: Long.bitCount(long) returns an int.
5358instruct popCountL_mem(rRegI dst, memory mem, rRegI tmp, eFlagsReg cr) %{
5359  predicate(UsePopCountInstruction);
5360  match(Set dst (PopCountL (LoadL mem)));
5361  effect(KILL cr, TEMP tmp, TEMP dst);
5362
5363  format %{ "POPCNT $dst, $mem\n\t"
5364            "POPCNT $tmp, $mem+4\n\t"
5365            "ADD    $dst, $tmp" %}
5366  ins_encode %{
5367    //__ popcntl($dst$$Register, $mem$$Address$$first);
5368    //__ popcntl($tmp$$Register, $mem$$Address$$second);
5369    __ popcntl($dst$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none));
5370    __ popcntl($tmp$$Register, Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none));
5371    __ addl($dst$$Register, $tmp$$Register);
5372  %}
5373  ins_pipe(ialu_reg);
5374%}
5375
5376
5377//----------Load/Store/Move Instructions---------------------------------------
5378//----------Load Instructions--------------------------------------------------
5379// Load Byte (8bit signed)
5380instruct loadB(xRegI dst, memory mem) %{
5381  match(Set dst (LoadB mem));
5382
5383  ins_cost(125);
5384  format %{ "MOVSX8 $dst,$mem\t# byte" %}
5385
5386  ins_encode %{
5387    __ movsbl($dst$$Register, $mem$$Address);
5388  %}
5389
5390  ins_pipe(ialu_reg_mem);
5391%}
5392
5393// Load Byte (8bit signed) into Long Register
5394instruct loadB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5395  match(Set dst (ConvI2L (LoadB mem)));
5396  effect(KILL cr);
5397
5398  ins_cost(375);
5399  format %{ "MOVSX8 $dst.lo,$mem\t# byte -> long\n\t"
5400            "MOV    $dst.hi,$dst.lo\n\t"
5401            "SAR    $dst.hi,7" %}
5402
5403  ins_encode %{
5404    __ movsbl($dst$$Register, $mem$$Address);
5405    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5406    __ sarl(HIGH_FROM_LOW($dst$$Register), 7); // 24+1 MSB are already signed extended.
5407  %}
5408
5409  ins_pipe(ialu_reg_mem);
5410%}
5411
5412// Load Unsigned Byte (8bit UNsigned)
5413instruct loadUB(xRegI dst, memory mem) %{
5414  match(Set dst (LoadUB mem));
5415
5416  ins_cost(125);
5417  format %{ "MOVZX8 $dst,$mem\t# ubyte -> int" %}
5418
5419  ins_encode %{
5420    __ movzbl($dst$$Register, $mem$$Address);
5421  %}
5422
5423  ins_pipe(ialu_reg_mem);
5424%}
5425
5426// Load Unsigned Byte (8 bit UNsigned) into Long Register
5427instruct loadUB2L(eRegL dst, memory mem, eFlagsReg cr) %{
5428  match(Set dst (ConvI2L (LoadUB mem)));
5429  effect(KILL cr);
5430
5431  ins_cost(250);
5432  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte -> long\n\t"
5433            "XOR    $dst.hi,$dst.hi" %}
5434
5435  ins_encode %{
5436    Register Rdst = $dst$$Register;
5437    __ movzbl(Rdst, $mem$$Address);
5438    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5439  %}
5440
5441  ins_pipe(ialu_reg_mem);
5442%}
5443
5444// Load Unsigned Byte (8 bit UNsigned) with mask into Long Register
5445instruct loadUB2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5446  match(Set dst (ConvI2L (AndI (LoadUB mem) mask)));
5447  effect(KILL cr);
5448
5449  format %{ "MOVZX8 $dst.lo,$mem\t# ubyte & 32-bit mask -> long\n\t"
5450            "XOR    $dst.hi,$dst.hi\n\t"
5451            "AND    $dst.lo,right_n_bits($mask, 8)" %}
5452  ins_encode %{
5453    Register Rdst = $dst$$Register;
5454    __ movzbl(Rdst, $mem$$Address);
5455    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5456    __ andl(Rdst, $mask$$constant & right_n_bits(8));
5457  %}
5458  ins_pipe(ialu_reg_mem);
5459%}
5460
5461// Load Short (16bit signed)
5462instruct loadS(rRegI dst, memory mem) %{
5463  match(Set dst (LoadS mem));
5464
5465  ins_cost(125);
5466  format %{ "MOVSX  $dst,$mem\t# short" %}
5467
5468  ins_encode %{
5469    __ movswl($dst$$Register, $mem$$Address);
5470  %}
5471
5472  ins_pipe(ialu_reg_mem);
5473%}
5474
5475// Load Short (16 bit signed) to Byte (8 bit signed)
5476instruct loadS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5477  match(Set dst (RShiftI (LShiftI (LoadS mem) twentyfour) twentyfour));
5478
5479  ins_cost(125);
5480  format %{ "MOVSX  $dst, $mem\t# short -> byte" %}
5481  ins_encode %{
5482    __ movsbl($dst$$Register, $mem$$Address);
5483  %}
5484  ins_pipe(ialu_reg_mem);
5485%}
5486
5487// Load Short (16bit signed) into Long Register
5488instruct loadS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5489  match(Set dst (ConvI2L (LoadS mem)));
5490  effect(KILL cr);
5491
5492  ins_cost(375);
5493  format %{ "MOVSX  $dst.lo,$mem\t# short -> long\n\t"
5494            "MOV    $dst.hi,$dst.lo\n\t"
5495            "SAR    $dst.hi,15" %}
5496
5497  ins_encode %{
5498    __ movswl($dst$$Register, $mem$$Address);
5499    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5500    __ sarl(HIGH_FROM_LOW($dst$$Register), 15); // 16+1 MSB are already signed extended.
5501  %}
5502
5503  ins_pipe(ialu_reg_mem);
5504%}
5505
5506// Load Unsigned Short/Char (16bit unsigned)
5507instruct loadUS(rRegI dst, memory mem) %{
5508  match(Set dst (LoadUS mem));
5509
5510  ins_cost(125);
5511  format %{ "MOVZX  $dst,$mem\t# ushort/char -> int" %}
5512
5513  ins_encode %{
5514    __ movzwl($dst$$Register, $mem$$Address);
5515  %}
5516
5517  ins_pipe(ialu_reg_mem);
5518%}
5519
5520// Load Unsigned Short/Char (16 bit UNsigned) to Byte (8 bit signed)
5521instruct loadUS2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5522  match(Set dst (RShiftI (LShiftI (LoadUS mem) twentyfour) twentyfour));
5523
5524  ins_cost(125);
5525  format %{ "MOVSX  $dst, $mem\t# ushort -> byte" %}
5526  ins_encode %{
5527    __ movsbl($dst$$Register, $mem$$Address);
5528  %}
5529  ins_pipe(ialu_reg_mem);
5530%}
5531
5532// Load Unsigned Short/Char (16 bit UNsigned) into Long Register
5533instruct loadUS2L(eRegL dst, memory mem, eFlagsReg cr) %{
5534  match(Set dst (ConvI2L (LoadUS mem)));
5535  effect(KILL cr);
5536
5537  ins_cost(250);
5538  format %{ "MOVZX  $dst.lo,$mem\t# ushort/char -> long\n\t"
5539            "XOR    $dst.hi,$dst.hi" %}
5540
5541  ins_encode %{
5542    __ movzwl($dst$$Register, $mem$$Address);
5543    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5544  %}
5545
5546  ins_pipe(ialu_reg_mem);
5547%}
5548
5549// Load Unsigned Short/Char (16 bit UNsigned) with mask 0xFF into Long Register
5550instruct loadUS2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5551  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5552  effect(KILL cr);
5553
5554  format %{ "MOVZX8 $dst.lo,$mem\t# ushort/char & 0xFF -> long\n\t"
5555            "XOR    $dst.hi,$dst.hi" %}
5556  ins_encode %{
5557    Register Rdst = $dst$$Register;
5558    __ movzbl(Rdst, $mem$$Address);
5559    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5560  %}
5561  ins_pipe(ialu_reg_mem);
5562%}
5563
5564// Load Unsigned Short/Char (16 bit UNsigned) with a 32-bit mask into Long Register
5565instruct loadUS2L_immI(eRegL dst, memory mem, immI mask, eFlagsReg cr) %{
5566  match(Set dst (ConvI2L (AndI (LoadUS mem) mask)));
5567  effect(KILL cr);
5568
5569  format %{ "MOVZX  $dst.lo, $mem\t# ushort/char & 32-bit mask -> long\n\t"
5570            "XOR    $dst.hi,$dst.hi\n\t"
5571            "AND    $dst.lo,right_n_bits($mask, 16)" %}
5572  ins_encode %{
5573    Register Rdst = $dst$$Register;
5574    __ movzwl(Rdst, $mem$$Address);
5575    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5576    __ andl(Rdst, $mask$$constant & right_n_bits(16));
5577  %}
5578  ins_pipe(ialu_reg_mem);
5579%}
5580
5581// Load Integer
5582instruct loadI(rRegI dst, memory mem) %{
5583  match(Set dst (LoadI mem));
5584
5585  ins_cost(125);
5586  format %{ "MOV    $dst,$mem\t# int" %}
5587
5588  ins_encode %{
5589    __ movl($dst$$Register, $mem$$Address);
5590  %}
5591
5592  ins_pipe(ialu_reg_mem);
5593%}
5594
5595// Load Integer (32 bit signed) to Byte (8 bit signed)
5596instruct loadI2B(rRegI dst, memory mem, immI_24 twentyfour) %{
5597  match(Set dst (RShiftI (LShiftI (LoadI mem) twentyfour) twentyfour));
5598
5599  ins_cost(125);
5600  format %{ "MOVSX  $dst, $mem\t# int -> byte" %}
5601  ins_encode %{
5602    __ movsbl($dst$$Register, $mem$$Address);
5603  %}
5604  ins_pipe(ialu_reg_mem);
5605%}
5606
5607// Load Integer (32 bit signed) to Unsigned Byte (8 bit UNsigned)
5608instruct loadI2UB(rRegI dst, memory mem, immI_255 mask) %{
5609  match(Set dst (AndI (LoadI mem) mask));
5610
5611  ins_cost(125);
5612  format %{ "MOVZX  $dst, $mem\t# int -> ubyte" %}
5613  ins_encode %{
5614    __ movzbl($dst$$Register, $mem$$Address);
5615  %}
5616  ins_pipe(ialu_reg_mem);
5617%}
5618
5619// Load Integer (32 bit signed) to Short (16 bit signed)
5620instruct loadI2S(rRegI dst, memory mem, immI_16 sixteen) %{
5621  match(Set dst (RShiftI (LShiftI (LoadI mem) sixteen) sixteen));
5622
5623  ins_cost(125);
5624  format %{ "MOVSX  $dst, $mem\t# int -> short" %}
5625  ins_encode %{
5626    __ movswl($dst$$Register, $mem$$Address);
5627  %}
5628  ins_pipe(ialu_reg_mem);
5629%}
5630
5631// Load Integer (32 bit signed) to Unsigned Short/Char (16 bit UNsigned)
5632instruct loadI2US(rRegI dst, memory mem, immI_65535 mask) %{
5633  match(Set dst (AndI (LoadI mem) mask));
5634
5635  ins_cost(125);
5636  format %{ "MOVZX  $dst, $mem\t# int -> ushort/char" %}
5637  ins_encode %{
5638    __ movzwl($dst$$Register, $mem$$Address);
5639  %}
5640  ins_pipe(ialu_reg_mem);
5641%}
5642
5643// Load Integer into Long Register
5644instruct loadI2L(eRegL dst, memory mem, eFlagsReg cr) %{
5645  match(Set dst (ConvI2L (LoadI mem)));
5646  effect(KILL cr);
5647
5648  ins_cost(375);
5649  format %{ "MOV    $dst.lo,$mem\t# int -> long\n\t"
5650            "MOV    $dst.hi,$dst.lo\n\t"
5651            "SAR    $dst.hi,31" %}
5652
5653  ins_encode %{
5654    __ movl($dst$$Register, $mem$$Address);
5655    __ movl(HIGH_FROM_LOW($dst$$Register), $dst$$Register); // This is always a different register.
5656    __ sarl(HIGH_FROM_LOW($dst$$Register), 31);
5657  %}
5658
5659  ins_pipe(ialu_reg_mem);
5660%}
5661
5662// Load Integer with mask 0xFF into Long Register
5663instruct loadI2L_immI_255(eRegL dst, memory mem, immI_255 mask, eFlagsReg cr) %{
5664  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5665  effect(KILL cr);
5666
5667  format %{ "MOVZX8 $dst.lo,$mem\t# int & 0xFF -> long\n\t"
5668            "XOR    $dst.hi,$dst.hi" %}
5669  ins_encode %{
5670    Register Rdst = $dst$$Register;
5671    __ movzbl(Rdst, $mem$$Address);
5672    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5673  %}
5674  ins_pipe(ialu_reg_mem);
5675%}
5676
5677// Load Integer with mask 0xFFFF into Long Register
5678instruct loadI2L_immI_65535(eRegL dst, memory mem, immI_65535 mask, eFlagsReg cr) %{
5679  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5680  effect(KILL cr);
5681
5682  format %{ "MOVZX  $dst.lo,$mem\t# int & 0xFFFF -> long\n\t"
5683            "XOR    $dst.hi,$dst.hi" %}
5684  ins_encode %{
5685    Register Rdst = $dst$$Register;
5686    __ movzwl(Rdst, $mem$$Address);
5687    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5688  %}
5689  ins_pipe(ialu_reg_mem);
5690%}
5691
5692// Load Integer with 31-bit mask into Long Register
5693instruct loadI2L_immU31(eRegL dst, memory mem, immU31 mask, eFlagsReg cr) %{
5694  match(Set dst (ConvI2L (AndI (LoadI mem) mask)));
5695  effect(KILL cr);
5696
5697  format %{ "MOV    $dst.lo,$mem\t# int & 31-bit mask -> long\n\t"
5698            "XOR    $dst.hi,$dst.hi\n\t"
5699            "AND    $dst.lo,$mask" %}
5700  ins_encode %{
5701    Register Rdst = $dst$$Register;
5702    __ movl(Rdst, $mem$$Address);
5703    __ xorl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rdst));
5704    __ andl(Rdst, $mask$$constant);
5705  %}
5706  ins_pipe(ialu_reg_mem);
5707%}
5708
5709// Load Unsigned Integer into Long Register
5710instruct loadUI2L(eRegL dst, memory mem, immL_32bits mask, eFlagsReg cr) %{
5711  match(Set dst (AndL (ConvI2L (LoadI mem)) mask));
5712  effect(KILL cr);
5713
5714  ins_cost(250);
5715  format %{ "MOV    $dst.lo,$mem\t# uint -> long\n\t"
5716            "XOR    $dst.hi,$dst.hi" %}
5717
5718  ins_encode %{
5719    __ movl($dst$$Register, $mem$$Address);
5720    __ xorl(HIGH_FROM_LOW($dst$$Register), HIGH_FROM_LOW($dst$$Register));
5721  %}
5722
5723  ins_pipe(ialu_reg_mem);
5724%}
5725
5726// Load Long.  Cannot clobber address while loading, so restrict address
5727// register to ESI
5728instruct loadL(eRegL dst, load_long_memory mem) %{
5729  predicate(!((LoadLNode*)n)->require_atomic_access());
5730  match(Set dst (LoadL mem));
5731
5732  ins_cost(250);
5733  format %{ "MOV    $dst.lo,$mem\t# long\n\t"
5734            "MOV    $dst.hi,$mem+4" %}
5735
5736  ins_encode %{
5737    Address Amemlo = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp, relocInfo::none);
5738    Address Amemhi = Address::make_raw($mem$$base, $mem$$index, $mem$$scale, $mem$$disp + 4, relocInfo::none);
5739    __ movl($dst$$Register, Amemlo);
5740    __ movl(HIGH_FROM_LOW($dst$$Register), Amemhi);
5741  %}
5742
5743  ins_pipe(ialu_reg_long_mem);
5744%}
5745
5746// Volatile Load Long.  Must be atomic, so do 64-bit FILD
5747// then store it down to the stack and reload on the int
5748// side.
5749instruct loadL_volatile(stackSlotL dst, memory mem) %{
5750  predicate(UseSSE<=1 && ((LoadLNode*)n)->require_atomic_access());
5751  match(Set dst (LoadL mem));
5752
5753  ins_cost(200);
5754  format %{ "FILD   $mem\t# Atomic volatile long load\n\t"
5755            "FISTp  $dst" %}
5756  ins_encode(enc_loadL_volatile(mem,dst));
5757  ins_pipe( fpu_reg_mem );
5758%}
5759
5760instruct loadLX_volatile(stackSlotL dst, memory mem, regD tmp) %{
5761  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5762  match(Set dst (LoadL mem));
5763  effect(TEMP tmp);
5764  ins_cost(180);
5765  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5766            "MOVSD  $dst,$tmp" %}
5767  ins_encode %{
5768    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5769    __ movdbl(Address(rsp, $dst$$disp), $tmp$$XMMRegister);
5770  %}
5771  ins_pipe( pipe_slow );
5772%}
5773
5774instruct loadLX_reg_volatile(eRegL dst, memory mem, regD tmp) %{
5775  predicate(UseSSE>=2 && ((LoadLNode*)n)->require_atomic_access());
5776  match(Set dst (LoadL mem));
5777  effect(TEMP tmp);
5778  ins_cost(160);
5779  format %{ "MOVSD  $tmp,$mem\t# Atomic volatile long load\n\t"
5780            "MOVD   $dst.lo,$tmp\n\t"
5781            "PSRLQ  $tmp,32\n\t"
5782            "MOVD   $dst.hi,$tmp" %}
5783  ins_encode %{
5784    __ movdbl($tmp$$XMMRegister, $mem$$Address);
5785    __ movdl($dst$$Register, $tmp$$XMMRegister);
5786    __ psrlq($tmp$$XMMRegister, 32);
5787    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
5788  %}
5789  ins_pipe( pipe_slow );
5790%}
5791
5792// Load Range
5793instruct loadRange(rRegI dst, memory mem) %{
5794  match(Set dst (LoadRange mem));
5795
5796  ins_cost(125);
5797  format %{ "MOV    $dst,$mem" %}
5798  opcode(0x8B);
5799  ins_encode( OpcP, RegMem(dst,mem));
5800  ins_pipe( ialu_reg_mem );
5801%}
5802
5803
5804// Load Pointer
5805instruct loadP(eRegP dst, memory mem) %{
5806  match(Set dst (LoadP mem));
5807
5808  ins_cost(125);
5809  format %{ "MOV    $dst,$mem" %}
5810  opcode(0x8B);
5811  ins_encode( OpcP, RegMem(dst,mem));
5812  ins_pipe( ialu_reg_mem );
5813%}
5814
5815// Load Klass Pointer
5816instruct loadKlass(eRegP dst, memory mem) %{
5817  match(Set dst (LoadKlass mem));
5818
5819  ins_cost(125);
5820  format %{ "MOV    $dst,$mem" %}
5821  opcode(0x8B);
5822  ins_encode( OpcP, RegMem(dst,mem));
5823  ins_pipe( ialu_reg_mem );
5824%}
5825
5826// Load Double
5827instruct loadDPR(regDPR dst, memory mem) %{
5828  predicate(UseSSE<=1);
5829  match(Set dst (LoadD mem));
5830
5831  ins_cost(150);
5832  format %{ "FLD_D  ST,$mem\n\t"
5833            "FSTP   $dst" %}
5834  opcode(0xDD);               /* DD /0 */
5835  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5836              Pop_Reg_DPR(dst) );
5837  ins_pipe( fpu_reg_mem );
5838%}
5839
5840// Load Double to XMM
5841instruct loadD(regD dst, memory mem) %{
5842  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
5843  match(Set dst (LoadD mem));
5844  ins_cost(145);
5845  format %{ "MOVSD  $dst,$mem" %}
5846  ins_encode %{
5847    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5848  %}
5849  ins_pipe( pipe_slow );
5850%}
5851
5852instruct loadD_partial(regD dst, memory mem) %{
5853  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
5854  match(Set dst (LoadD mem));
5855  ins_cost(145);
5856  format %{ "MOVLPD $dst,$mem" %}
5857  ins_encode %{
5858    __ movdbl ($dst$$XMMRegister, $mem$$Address);
5859  %}
5860  ins_pipe( pipe_slow );
5861%}
5862
5863// Load to XMM register (single-precision floating point)
5864// MOVSS instruction
5865instruct loadF(regF dst, memory mem) %{
5866  predicate(UseSSE>=1);
5867  match(Set dst (LoadF mem));
5868  ins_cost(145);
5869  format %{ "MOVSS  $dst,$mem" %}
5870  ins_encode %{
5871    __ movflt ($dst$$XMMRegister, $mem$$Address);
5872  %}
5873  ins_pipe( pipe_slow );
5874%}
5875
5876// Load Float
5877instruct loadFPR(regFPR dst, memory mem) %{
5878  predicate(UseSSE==0);
5879  match(Set dst (LoadF mem));
5880
5881  ins_cost(150);
5882  format %{ "FLD_S  ST,$mem\n\t"
5883            "FSTP   $dst" %}
5884  opcode(0xD9);               /* D9 /0 */
5885  ins_encode( OpcP, RMopc_Mem(0x00,mem),
5886              Pop_Reg_FPR(dst) );
5887  ins_pipe( fpu_reg_mem );
5888%}
5889
5890// Load Effective Address
5891instruct leaP8(eRegP dst, indOffset8 mem) %{
5892  match(Set dst mem);
5893
5894  ins_cost(110);
5895  format %{ "LEA    $dst,$mem" %}
5896  opcode(0x8D);
5897  ins_encode( OpcP, RegMem(dst,mem));
5898  ins_pipe( ialu_reg_reg_fat );
5899%}
5900
5901instruct leaP32(eRegP dst, indOffset32 mem) %{
5902  match(Set dst mem);
5903
5904  ins_cost(110);
5905  format %{ "LEA    $dst,$mem" %}
5906  opcode(0x8D);
5907  ins_encode( OpcP, RegMem(dst,mem));
5908  ins_pipe( ialu_reg_reg_fat );
5909%}
5910
5911instruct leaPIdxOff(eRegP dst, indIndexOffset mem) %{
5912  match(Set dst mem);
5913
5914  ins_cost(110);
5915  format %{ "LEA    $dst,$mem" %}
5916  opcode(0x8D);
5917  ins_encode( OpcP, RegMem(dst,mem));
5918  ins_pipe( ialu_reg_reg_fat );
5919%}
5920
5921instruct leaPIdxScale(eRegP dst, indIndexScale mem) %{
5922  match(Set dst mem);
5923
5924  ins_cost(110);
5925  format %{ "LEA    $dst,$mem" %}
5926  opcode(0x8D);
5927  ins_encode( OpcP, RegMem(dst,mem));
5928  ins_pipe( ialu_reg_reg_fat );
5929%}
5930
5931instruct leaPIdxScaleOff(eRegP dst, indIndexScaleOffset mem) %{
5932  match(Set dst mem);
5933
5934  ins_cost(110);
5935  format %{ "LEA    $dst,$mem" %}
5936  opcode(0x8D);
5937  ins_encode( OpcP, RegMem(dst,mem));
5938  ins_pipe( ialu_reg_reg_fat );
5939%}
5940
5941// Load Constant
5942instruct loadConI(rRegI dst, immI src) %{
5943  match(Set dst src);
5944
5945  format %{ "MOV    $dst,$src" %}
5946  ins_encode( LdImmI(dst, src) );
5947  ins_pipe( ialu_reg_fat );
5948%}
5949
5950// Load Constant zero
5951instruct loadConI0(rRegI dst, immI0 src, eFlagsReg cr) %{
5952  match(Set dst src);
5953  effect(KILL cr);
5954
5955  ins_cost(50);
5956  format %{ "XOR    $dst,$dst" %}
5957  opcode(0x33);  /* + rd */
5958  ins_encode( OpcP, RegReg( dst, dst ) );
5959  ins_pipe( ialu_reg );
5960%}
5961
5962instruct loadConP(eRegP dst, immP src) %{
5963  match(Set dst src);
5964
5965  format %{ "MOV    $dst,$src" %}
5966  opcode(0xB8);  /* + rd */
5967  ins_encode( LdImmP(dst, src) );
5968  ins_pipe( ialu_reg_fat );
5969%}
5970
5971instruct loadConL(eRegL dst, immL src, eFlagsReg cr) %{
5972  match(Set dst src);
5973  effect(KILL cr);
5974  ins_cost(200);
5975  format %{ "MOV    $dst.lo,$src.lo\n\t"
5976            "MOV    $dst.hi,$src.hi" %}
5977  opcode(0xB8);
5978  ins_encode( LdImmL_Lo(dst, src), LdImmL_Hi(dst, src) );
5979  ins_pipe( ialu_reg_long_fat );
5980%}
5981
5982instruct loadConL0(eRegL dst, immL0 src, eFlagsReg cr) %{
5983  match(Set dst src);
5984  effect(KILL cr);
5985  ins_cost(150);
5986  format %{ "XOR    $dst.lo,$dst.lo\n\t"
5987            "XOR    $dst.hi,$dst.hi" %}
5988  opcode(0x33,0x33);
5989  ins_encode( RegReg_Lo(dst,dst), RegReg_Hi(dst, dst) );
5990  ins_pipe( ialu_reg_long );
5991%}
5992
5993// The instruction usage is guarded by predicate in operand immFPR().
5994instruct loadConFPR(regFPR dst, immFPR con) %{
5995  match(Set dst con);
5996  ins_cost(125);
5997  format %{ "FLD_S  ST,[$constantaddress]\t# load from constant table: float=$con\n\t"
5998            "FSTP   $dst" %}
5999  ins_encode %{
6000    __ fld_s($constantaddress($con));
6001    __ fstp_d($dst$$reg);
6002  %}
6003  ins_pipe(fpu_reg_con);
6004%}
6005
6006// The instruction usage is guarded by predicate in operand immFPR0().
6007instruct loadConFPR0(regFPR dst, immFPR0 con) %{
6008  match(Set dst con);
6009  ins_cost(125);
6010  format %{ "FLDZ   ST\n\t"
6011            "FSTP   $dst" %}
6012  ins_encode %{
6013    __ fldz();
6014    __ fstp_d($dst$$reg);
6015  %}
6016  ins_pipe(fpu_reg_con);
6017%}
6018
6019// The instruction usage is guarded by predicate in operand immFPR1().
6020instruct loadConFPR1(regFPR dst, immFPR1 con) %{
6021  match(Set dst con);
6022  ins_cost(125);
6023  format %{ "FLD1   ST\n\t"
6024            "FSTP   $dst" %}
6025  ins_encode %{
6026    __ fld1();
6027    __ fstp_d($dst$$reg);
6028  %}
6029  ins_pipe(fpu_reg_con);
6030%}
6031
6032// The instruction usage is guarded by predicate in operand immF().
6033instruct loadConF(regF dst, immF con) %{
6034  match(Set dst con);
6035  ins_cost(125);
6036  format %{ "MOVSS  $dst,[$constantaddress]\t# load from constant table: float=$con" %}
6037  ins_encode %{
6038    __ movflt($dst$$XMMRegister, $constantaddress($con));
6039  %}
6040  ins_pipe(pipe_slow);
6041%}
6042
6043// The instruction usage is guarded by predicate in operand immF0().
6044instruct loadConF0(regF dst, immF0 src) %{
6045  match(Set dst src);
6046  ins_cost(100);
6047  format %{ "XORPS  $dst,$dst\t# float 0.0" %}
6048  ins_encode %{
6049    __ xorps($dst$$XMMRegister, $dst$$XMMRegister);
6050  %}
6051  ins_pipe(pipe_slow);
6052%}
6053
6054// The instruction usage is guarded by predicate in operand immDPR().
6055instruct loadConDPR(regDPR dst, immDPR con) %{
6056  match(Set dst con);
6057  ins_cost(125);
6058
6059  format %{ "FLD_D  ST,[$constantaddress]\t# load from constant table: double=$con\n\t"
6060            "FSTP   $dst" %}
6061  ins_encode %{
6062    __ fld_d($constantaddress($con));
6063    __ fstp_d($dst$$reg);
6064  %}
6065  ins_pipe(fpu_reg_con);
6066%}
6067
6068// The instruction usage is guarded by predicate in operand immDPR0().
6069instruct loadConDPR0(regDPR dst, immDPR0 con) %{
6070  match(Set dst con);
6071  ins_cost(125);
6072
6073  format %{ "FLDZ   ST\n\t"
6074            "FSTP   $dst" %}
6075  ins_encode %{
6076    __ fldz();
6077    __ fstp_d($dst$$reg);
6078  %}
6079  ins_pipe(fpu_reg_con);
6080%}
6081
6082// The instruction usage is guarded by predicate in operand immDPR1().
6083instruct loadConDPR1(regDPR dst, immDPR1 con) %{
6084  match(Set dst con);
6085  ins_cost(125);
6086
6087  format %{ "FLD1   ST\n\t"
6088            "FSTP   $dst" %}
6089  ins_encode %{
6090    __ fld1();
6091    __ fstp_d($dst$$reg);
6092  %}
6093  ins_pipe(fpu_reg_con);
6094%}
6095
6096// The instruction usage is guarded by predicate in operand immD().
6097instruct loadConD(regD dst, immD con) %{
6098  match(Set dst con);
6099  ins_cost(125);
6100  format %{ "MOVSD  $dst,[$constantaddress]\t# load from constant table: double=$con" %}
6101  ins_encode %{
6102    __ movdbl($dst$$XMMRegister, $constantaddress($con));
6103  %}
6104  ins_pipe(pipe_slow);
6105%}
6106
6107// The instruction usage is guarded by predicate in operand immD0().
6108instruct loadConD0(regD dst, immD0 src) %{
6109  match(Set dst src);
6110  ins_cost(100);
6111  format %{ "XORPD  $dst,$dst\t# double 0.0" %}
6112  ins_encode %{
6113    __ xorpd ($dst$$XMMRegister, $dst$$XMMRegister);
6114  %}
6115  ins_pipe( pipe_slow );
6116%}
6117
6118// Load Stack Slot
6119instruct loadSSI(rRegI dst, stackSlotI src) %{
6120  match(Set dst src);
6121  ins_cost(125);
6122
6123  format %{ "MOV    $dst,$src" %}
6124  opcode(0x8B);
6125  ins_encode( OpcP, RegMem(dst,src));
6126  ins_pipe( ialu_reg_mem );
6127%}
6128
6129instruct loadSSL(eRegL dst, stackSlotL src) %{
6130  match(Set dst src);
6131
6132  ins_cost(200);
6133  format %{ "MOV    $dst,$src.lo\n\t"
6134            "MOV    $dst+4,$src.hi" %}
6135  opcode(0x8B, 0x8B);
6136  ins_encode( OpcP, RegMem( dst, src ), OpcS, RegMem_Hi( dst, src ) );
6137  ins_pipe( ialu_mem_long_reg );
6138%}
6139
6140// Load Stack Slot
6141instruct loadSSP(eRegP dst, stackSlotP src) %{
6142  match(Set dst src);
6143  ins_cost(125);
6144
6145  format %{ "MOV    $dst,$src" %}
6146  opcode(0x8B);
6147  ins_encode( OpcP, RegMem(dst,src));
6148  ins_pipe( ialu_reg_mem );
6149%}
6150
6151// Load Stack Slot
6152instruct loadSSF(regFPR dst, stackSlotF src) %{
6153  match(Set dst src);
6154  ins_cost(125);
6155
6156  format %{ "FLD_S  $src\n\t"
6157            "FSTP   $dst" %}
6158  opcode(0xD9);               /* D9 /0, FLD m32real */
6159  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6160              Pop_Reg_FPR(dst) );
6161  ins_pipe( fpu_reg_mem );
6162%}
6163
6164// Load Stack Slot
6165instruct loadSSD(regDPR dst, stackSlotD src) %{
6166  match(Set dst src);
6167  ins_cost(125);
6168
6169  format %{ "FLD_D  $src\n\t"
6170            "FSTP   $dst" %}
6171  opcode(0xDD);               /* DD /0, FLD m64real */
6172  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
6173              Pop_Reg_DPR(dst) );
6174  ins_pipe( fpu_reg_mem );
6175%}
6176
6177// Prefetch instructions for allocation.
6178// Must be safe to execute with invalid address (cannot fault).
6179
6180instruct prefetchAlloc0( memory mem ) %{
6181  predicate(UseSSE==0 && AllocatePrefetchInstr!=3);
6182  match(PrefetchAllocation mem);
6183  ins_cost(0);
6184  size(0);
6185  format %{ "Prefetch allocation (non-SSE is empty encoding)" %}
6186  ins_encode();
6187  ins_pipe(empty);
6188%}
6189
6190instruct prefetchAlloc( memory mem ) %{
6191  predicate(AllocatePrefetchInstr==3);
6192  match( PrefetchAllocation mem );
6193  ins_cost(100);
6194
6195  format %{ "PREFETCHW $mem\t! Prefetch allocation into L1 cache and mark modified" %}
6196  ins_encode %{
6197    __ prefetchw($mem$$Address);
6198  %}
6199  ins_pipe(ialu_mem);
6200%}
6201
6202instruct prefetchAllocNTA( memory mem ) %{
6203  predicate(UseSSE>=1 && AllocatePrefetchInstr==0);
6204  match(PrefetchAllocation mem);
6205  ins_cost(100);
6206
6207  format %{ "PREFETCHNTA $mem\t! Prefetch allocation into non-temporal cache for write" %}
6208  ins_encode %{
6209    __ prefetchnta($mem$$Address);
6210  %}
6211  ins_pipe(ialu_mem);
6212%}
6213
6214instruct prefetchAllocT0( memory mem ) %{
6215  predicate(UseSSE>=1 && AllocatePrefetchInstr==1);
6216  match(PrefetchAllocation mem);
6217  ins_cost(100);
6218
6219  format %{ "PREFETCHT0 $mem\t! Prefetch allocation into L1 and L2 caches for write" %}
6220  ins_encode %{
6221    __ prefetcht0($mem$$Address);
6222  %}
6223  ins_pipe(ialu_mem);
6224%}
6225
6226instruct prefetchAllocT2( memory mem ) %{
6227  predicate(UseSSE>=1 && AllocatePrefetchInstr==2);
6228  match(PrefetchAllocation mem);
6229  ins_cost(100);
6230
6231  format %{ "PREFETCHT2 $mem\t! Prefetch allocation into L2 cache for write" %}
6232  ins_encode %{
6233    __ prefetcht2($mem$$Address);
6234  %}
6235  ins_pipe(ialu_mem);
6236%}
6237
6238//----------Store Instructions-------------------------------------------------
6239
6240// Store Byte
6241instruct storeB(memory mem, xRegI src) %{
6242  match(Set mem (StoreB mem src));
6243
6244  ins_cost(125);
6245  format %{ "MOV8   $mem,$src" %}
6246  opcode(0x88);
6247  ins_encode( OpcP, RegMem( src, mem ) );
6248  ins_pipe( ialu_mem_reg );
6249%}
6250
6251// Store Char/Short
6252instruct storeC(memory mem, rRegI src) %{
6253  match(Set mem (StoreC mem src));
6254
6255  ins_cost(125);
6256  format %{ "MOV16  $mem,$src" %}
6257  opcode(0x89, 0x66);
6258  ins_encode( OpcS, OpcP, RegMem( src, mem ) );
6259  ins_pipe( ialu_mem_reg );
6260%}
6261
6262// Store Integer
6263instruct storeI(memory mem, rRegI src) %{
6264  match(Set mem (StoreI mem src));
6265
6266  ins_cost(125);
6267  format %{ "MOV    $mem,$src" %}
6268  opcode(0x89);
6269  ins_encode( OpcP, RegMem( src, mem ) );
6270  ins_pipe( ialu_mem_reg );
6271%}
6272
6273// Store Long
6274instruct storeL(long_memory mem, eRegL src) %{
6275  predicate(!((StoreLNode*)n)->require_atomic_access());
6276  match(Set mem (StoreL mem src));
6277
6278  ins_cost(200);
6279  format %{ "MOV    $mem,$src.lo\n\t"
6280            "MOV    $mem+4,$src.hi" %}
6281  opcode(0x89, 0x89);
6282  ins_encode( OpcP, RegMem( src, mem ), OpcS, RegMem_Hi( src, mem ) );
6283  ins_pipe( ialu_mem_long_reg );
6284%}
6285
6286// Store Long to Integer
6287instruct storeL2I(memory mem, eRegL src) %{
6288  match(Set mem (StoreI mem (ConvL2I src)));
6289
6290  format %{ "MOV    $mem,$src.lo\t# long -> int" %}
6291  ins_encode %{
6292    __ movl($mem$$Address, $src$$Register);
6293  %}
6294  ins_pipe(ialu_mem_reg);
6295%}
6296
6297// Volatile Store Long.  Must be atomic, so move it into
6298// the FP TOS and then do a 64-bit FIST.  Has to probe the
6299// target address before the store (for null-ptr checks)
6300// so the memory operand is used twice in the encoding.
6301instruct storeL_volatile(memory mem, stackSlotL src, eFlagsReg cr ) %{
6302  predicate(UseSSE<=1 && ((StoreLNode*)n)->require_atomic_access());
6303  match(Set mem (StoreL mem src));
6304  effect( KILL cr );
6305  ins_cost(400);
6306  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6307            "FILD   $src\n\t"
6308            "FISTp  $mem\t # 64-bit atomic volatile long store" %}
6309  opcode(0x3B);
6310  ins_encode( OpcP, RegMem( EAX, mem ), enc_storeL_volatile(mem,src));
6311  ins_pipe( fpu_reg_mem );
6312%}
6313
6314instruct storeLX_volatile(memory mem, stackSlotL src, regD tmp, eFlagsReg cr) %{
6315  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6316  match(Set mem (StoreL mem src));
6317  effect( TEMP tmp, KILL cr );
6318  ins_cost(380);
6319  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6320            "MOVSD  $tmp,$src\n\t"
6321            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6322  ins_encode %{
6323    __ cmpl(rax, $mem$$Address);
6324    __ movdbl($tmp$$XMMRegister, Address(rsp, $src$$disp));
6325    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6326  %}
6327  ins_pipe( pipe_slow );
6328%}
6329
6330instruct storeLX_reg_volatile(memory mem, eRegL src, regD tmp2, regD tmp, eFlagsReg cr) %{
6331  predicate(UseSSE>=2 && ((StoreLNode*)n)->require_atomic_access());
6332  match(Set mem (StoreL mem src));
6333  effect( TEMP tmp2 , TEMP tmp, KILL cr );
6334  ins_cost(360);
6335  format %{ "CMP    $mem,EAX\t# Probe address for implicit null check\n\t"
6336            "MOVD   $tmp,$src.lo\n\t"
6337            "MOVD   $tmp2,$src.hi\n\t"
6338            "PUNPCKLDQ $tmp,$tmp2\n\t"
6339            "MOVSD  $mem,$tmp\t # 64-bit atomic volatile long store" %}
6340  ins_encode %{
6341    __ cmpl(rax, $mem$$Address);
6342    __ movdl($tmp$$XMMRegister, $src$$Register);
6343    __ movdl($tmp2$$XMMRegister, HIGH_FROM_LOW($src$$Register));
6344    __ punpckldq($tmp$$XMMRegister, $tmp2$$XMMRegister);
6345    __ movdbl($mem$$Address, $tmp$$XMMRegister);
6346  %}
6347  ins_pipe( pipe_slow );
6348%}
6349
6350// Store Pointer; for storing unknown oops and raw pointers
6351instruct storeP(memory mem, anyRegP src) %{
6352  match(Set mem (StoreP mem src));
6353
6354  ins_cost(125);
6355  format %{ "MOV    $mem,$src" %}
6356  opcode(0x89);
6357  ins_encode( OpcP, RegMem( src, mem ) );
6358  ins_pipe( ialu_mem_reg );
6359%}
6360
6361// Store Integer Immediate
6362instruct storeImmI(memory mem, immI src) %{
6363  match(Set mem (StoreI mem src));
6364
6365  ins_cost(150);
6366  format %{ "MOV    $mem,$src" %}
6367  opcode(0xC7);               /* C7 /0 */
6368  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6369  ins_pipe( ialu_mem_imm );
6370%}
6371
6372// Store Short/Char Immediate
6373instruct storeImmI16(memory mem, immI16 src) %{
6374  predicate(UseStoreImmI16);
6375  match(Set mem (StoreC mem src));
6376
6377  ins_cost(150);
6378  format %{ "MOV16  $mem,$src" %}
6379  opcode(0xC7);     /* C7 /0 Same as 32 store immediate with prefix */
6380  ins_encode( SizePrefix, OpcP, RMopc_Mem(0x00,mem),  Con16( src ));
6381  ins_pipe( ialu_mem_imm );
6382%}
6383
6384// Store Pointer Immediate; null pointers or constant oops that do not
6385// need card-mark barriers.
6386instruct storeImmP(memory mem, immP src) %{
6387  match(Set mem (StoreP mem src));
6388
6389  ins_cost(150);
6390  format %{ "MOV    $mem,$src" %}
6391  opcode(0xC7);               /* C7 /0 */
6392  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32( src ));
6393  ins_pipe( ialu_mem_imm );
6394%}
6395
6396// Store Byte Immediate
6397instruct storeImmB(memory mem, immI8 src) %{
6398  match(Set mem (StoreB mem src));
6399
6400  ins_cost(150);
6401  format %{ "MOV8   $mem,$src" %}
6402  opcode(0xC6);               /* C6 /0 */
6403  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6404  ins_pipe( ialu_mem_imm );
6405%}
6406
6407// Store CMS card-mark Immediate
6408instruct storeImmCM(memory mem, immI8 src) %{
6409  match(Set mem (StoreCM mem src));
6410
6411  ins_cost(150);
6412  format %{ "MOV8   $mem,$src\t! CMS card-mark imm0" %}
6413  opcode(0xC6);               /* C6 /0 */
6414  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con8or32( src ));
6415  ins_pipe( ialu_mem_imm );
6416%}
6417
6418// Store Double
6419instruct storeDPR( memory mem, regDPR1 src) %{
6420  predicate(UseSSE<=1);
6421  match(Set mem (StoreD mem src));
6422
6423  ins_cost(100);
6424  format %{ "FST_D  $mem,$src" %}
6425  opcode(0xDD);       /* DD /2 */
6426  ins_encode( enc_FPR_store(mem,src) );
6427  ins_pipe( fpu_mem_reg );
6428%}
6429
6430// Store double does rounding on x86
6431instruct storeDPR_rounded( memory mem, regDPR1 src) %{
6432  predicate(UseSSE<=1);
6433  match(Set mem (StoreD mem (RoundDouble src)));
6434
6435  ins_cost(100);
6436  format %{ "FST_D  $mem,$src\t# round" %}
6437  opcode(0xDD);       /* DD /2 */
6438  ins_encode( enc_FPR_store(mem,src) );
6439  ins_pipe( fpu_mem_reg );
6440%}
6441
6442// Store XMM register to memory (double-precision floating points)
6443// MOVSD instruction
6444instruct storeD(memory mem, regD src) %{
6445  predicate(UseSSE>=2);
6446  match(Set mem (StoreD mem src));
6447  ins_cost(95);
6448  format %{ "MOVSD  $mem,$src" %}
6449  ins_encode %{
6450    __ movdbl($mem$$Address, $src$$XMMRegister);
6451  %}
6452  ins_pipe( pipe_slow );
6453%}
6454
6455// Store XMM register to memory (single-precision floating point)
6456// MOVSS instruction
6457instruct storeF(memory mem, regF src) %{
6458  predicate(UseSSE>=1);
6459  match(Set mem (StoreF mem src));
6460  ins_cost(95);
6461  format %{ "MOVSS  $mem,$src" %}
6462  ins_encode %{
6463    __ movflt($mem$$Address, $src$$XMMRegister);
6464  %}
6465  ins_pipe( pipe_slow );
6466%}
6467
6468// Store Float
6469instruct storeFPR( memory mem, regFPR1 src) %{
6470  predicate(UseSSE==0);
6471  match(Set mem (StoreF mem src));
6472
6473  ins_cost(100);
6474  format %{ "FST_S  $mem,$src" %}
6475  opcode(0xD9);       /* D9 /2 */
6476  ins_encode( enc_FPR_store(mem,src) );
6477  ins_pipe( fpu_mem_reg );
6478%}
6479
6480// Store Float does rounding on x86
6481instruct storeFPR_rounded( memory mem, regFPR1 src) %{
6482  predicate(UseSSE==0);
6483  match(Set mem (StoreF mem (RoundFloat src)));
6484
6485  ins_cost(100);
6486  format %{ "FST_S  $mem,$src\t# round" %}
6487  opcode(0xD9);       /* D9 /2 */
6488  ins_encode( enc_FPR_store(mem,src) );
6489  ins_pipe( fpu_mem_reg );
6490%}
6491
6492// Store Float does rounding on x86
6493instruct storeFPR_Drounded( memory mem, regDPR1 src) %{
6494  predicate(UseSSE<=1);
6495  match(Set mem (StoreF mem (ConvD2F src)));
6496
6497  ins_cost(100);
6498  format %{ "FST_S  $mem,$src\t# D-round" %}
6499  opcode(0xD9);       /* D9 /2 */
6500  ins_encode( enc_FPR_store(mem,src) );
6501  ins_pipe( fpu_mem_reg );
6502%}
6503
6504// Store immediate Float value (it is faster than store from FPU register)
6505// The instruction usage is guarded by predicate in operand immFPR().
6506instruct storeFPR_imm( memory mem, immFPR src) %{
6507  match(Set mem (StoreF mem src));
6508
6509  ins_cost(50);
6510  format %{ "MOV    $mem,$src\t# store float" %}
6511  opcode(0xC7);               /* C7 /0 */
6512  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32FPR_as_bits( src ));
6513  ins_pipe( ialu_mem_imm );
6514%}
6515
6516// Store immediate Float value (it is faster than store from XMM register)
6517// The instruction usage is guarded by predicate in operand immF().
6518instruct storeF_imm( memory mem, immF src) %{
6519  match(Set mem (StoreF mem src));
6520
6521  ins_cost(50);
6522  format %{ "MOV    $mem,$src\t# store float" %}
6523  opcode(0xC7);               /* C7 /0 */
6524  ins_encode( OpcP, RMopc_Mem(0x00,mem),  Con32F_as_bits( src ));
6525  ins_pipe( ialu_mem_imm );
6526%}
6527
6528// Store Integer to stack slot
6529instruct storeSSI(stackSlotI dst, rRegI src) %{
6530  match(Set dst src);
6531
6532  ins_cost(100);
6533  format %{ "MOV    $dst,$src" %}
6534  opcode(0x89);
6535  ins_encode( OpcPRegSS( dst, src ) );
6536  ins_pipe( ialu_mem_reg );
6537%}
6538
6539// Store Integer to stack slot
6540instruct storeSSP(stackSlotP dst, eRegP src) %{
6541  match(Set dst src);
6542
6543  ins_cost(100);
6544  format %{ "MOV    $dst,$src" %}
6545  opcode(0x89);
6546  ins_encode( OpcPRegSS( dst, src ) );
6547  ins_pipe( ialu_mem_reg );
6548%}
6549
6550// Store Long to stack slot
6551instruct storeSSL(stackSlotL dst, eRegL src) %{
6552  match(Set dst src);
6553
6554  ins_cost(200);
6555  format %{ "MOV    $dst,$src.lo\n\t"
6556            "MOV    $dst+4,$src.hi" %}
6557  opcode(0x89, 0x89);
6558  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
6559  ins_pipe( ialu_mem_long_reg );
6560%}
6561
6562//----------MemBar Instructions-----------------------------------------------
6563// Memory barrier flavors
6564
6565instruct membar_acquire() %{
6566  match(MemBarAcquire);
6567  match(LoadFence);
6568  ins_cost(400);
6569
6570  size(0);
6571  format %{ "MEMBAR-acquire ! (empty encoding)" %}
6572  ins_encode();
6573  ins_pipe(empty);
6574%}
6575
6576instruct membar_acquire_lock() %{
6577  match(MemBarAcquireLock);
6578  ins_cost(0);
6579
6580  size(0);
6581  format %{ "MEMBAR-acquire (prior CMPXCHG in FastLock so empty encoding)" %}
6582  ins_encode( );
6583  ins_pipe(empty);
6584%}
6585
6586instruct membar_release() %{
6587  match(MemBarRelease);
6588  match(StoreFence);
6589  ins_cost(400);
6590
6591  size(0);
6592  format %{ "MEMBAR-release ! (empty encoding)" %}
6593  ins_encode( );
6594  ins_pipe(empty);
6595%}
6596
6597instruct membar_release_lock() %{
6598  match(MemBarReleaseLock);
6599  ins_cost(0);
6600
6601  size(0);
6602  format %{ "MEMBAR-release (a FastUnlock follows so empty encoding)" %}
6603  ins_encode( );
6604  ins_pipe(empty);
6605%}
6606
6607instruct membar_volatile(eFlagsReg cr) %{
6608  match(MemBarVolatile);
6609  effect(KILL cr);
6610  ins_cost(400);
6611
6612  format %{
6613    $$template
6614    if (os::is_MP()) {
6615      $$emit$$"LOCK ADDL [ESP + #0], 0\t! membar_volatile"
6616    } else {
6617      $$emit$$"MEMBAR-volatile ! (empty encoding)"
6618    }
6619  %}
6620  ins_encode %{
6621    __ membar(Assembler::StoreLoad);
6622  %}
6623  ins_pipe(pipe_slow);
6624%}
6625
6626instruct unnecessary_membar_volatile() %{
6627  match(MemBarVolatile);
6628  predicate(Matcher::post_store_load_barrier(n));
6629  ins_cost(0);
6630
6631  size(0);
6632  format %{ "MEMBAR-volatile (unnecessary so empty encoding)" %}
6633  ins_encode( );
6634  ins_pipe(empty);
6635%}
6636
6637instruct membar_storestore() %{
6638  match(MemBarStoreStore);
6639  ins_cost(0);
6640
6641  size(0);
6642  format %{ "MEMBAR-storestore (empty encoding)" %}
6643  ins_encode( );
6644  ins_pipe(empty);
6645%}
6646
6647//----------Move Instructions--------------------------------------------------
6648instruct castX2P(eAXRegP dst, eAXRegI src) %{
6649  match(Set dst (CastX2P src));
6650  format %{ "# X2P  $dst, $src" %}
6651  ins_encode( /*empty encoding*/ );
6652  ins_cost(0);
6653  ins_pipe(empty);
6654%}
6655
6656instruct castP2X(rRegI dst, eRegP src ) %{
6657  match(Set dst (CastP2X src));
6658  ins_cost(50);
6659  format %{ "MOV    $dst, $src\t# CastP2X" %}
6660  ins_encode( enc_Copy( dst, src) );
6661  ins_pipe( ialu_reg_reg );
6662%}
6663
6664//----------Conditional Move---------------------------------------------------
6665// Conditional move
6666instruct jmovI_reg(cmpOp cop, eFlagsReg cr, rRegI dst, rRegI src) %{
6667  predicate(!VM_Version::supports_cmov() );
6668  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6669  ins_cost(200);
6670  format %{ "J$cop,us skip\t# signed cmove\n\t"
6671            "MOV    $dst,$src\n"
6672      "skip:" %}
6673  ins_encode %{
6674    Label Lskip;
6675    // Invert sense of branch from sense of CMOV
6676    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6677    __ movl($dst$$Register, $src$$Register);
6678    __ bind(Lskip);
6679  %}
6680  ins_pipe( pipe_cmov_reg );
6681%}
6682
6683instruct jmovI_regU(cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src) %{
6684  predicate(!VM_Version::supports_cmov() );
6685  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6686  ins_cost(200);
6687  format %{ "J$cop,us skip\t# unsigned cmove\n\t"
6688            "MOV    $dst,$src\n"
6689      "skip:" %}
6690  ins_encode %{
6691    Label Lskip;
6692    // Invert sense of branch from sense of CMOV
6693    __ jccb((Assembler::Condition)($cop$$cmpcode^1), Lskip);
6694    __ movl($dst$$Register, $src$$Register);
6695    __ bind(Lskip);
6696  %}
6697  ins_pipe( pipe_cmov_reg );
6698%}
6699
6700instruct cmovI_reg(rRegI dst, rRegI src, eFlagsReg cr, cmpOp cop ) %{
6701  predicate(VM_Version::supports_cmov() );
6702  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6703  ins_cost(200);
6704  format %{ "CMOV$cop $dst,$src" %}
6705  opcode(0x0F,0x40);
6706  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6707  ins_pipe( pipe_cmov_reg );
6708%}
6709
6710instruct cmovI_regU( cmpOpU cop, eFlagsRegU cr, rRegI dst, rRegI src ) %{
6711  predicate(VM_Version::supports_cmov() );
6712  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6713  ins_cost(200);
6714  format %{ "CMOV$cop $dst,$src" %}
6715  opcode(0x0F,0x40);
6716  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6717  ins_pipe( pipe_cmov_reg );
6718%}
6719
6720instruct cmovI_regUCF( cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, rRegI src ) %{
6721  predicate(VM_Version::supports_cmov() );
6722  match(Set dst (CMoveI (Binary cop cr) (Binary dst src)));
6723  ins_cost(200);
6724  expand %{
6725    cmovI_regU(cop, cr, dst, src);
6726  %}
6727%}
6728
6729// Conditional move
6730instruct cmovI_mem(cmpOp cop, eFlagsReg cr, rRegI dst, memory src) %{
6731  predicate(VM_Version::supports_cmov() );
6732  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6733  ins_cost(250);
6734  format %{ "CMOV$cop $dst,$src" %}
6735  opcode(0x0F,0x40);
6736  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6737  ins_pipe( pipe_cmov_mem );
6738%}
6739
6740// Conditional move
6741instruct cmovI_memU(cmpOpU cop, eFlagsRegU cr, rRegI dst, memory src) %{
6742  predicate(VM_Version::supports_cmov() );
6743  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6744  ins_cost(250);
6745  format %{ "CMOV$cop $dst,$src" %}
6746  opcode(0x0F,0x40);
6747  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6748  ins_pipe( pipe_cmov_mem );
6749%}
6750
6751instruct cmovI_memUCF(cmpOpUCF cop, eFlagsRegUCF cr, rRegI dst, memory src) %{
6752  predicate(VM_Version::supports_cmov() );
6753  match(Set dst (CMoveI (Binary cop cr) (Binary dst (LoadI src))));
6754  ins_cost(250);
6755  expand %{
6756    cmovI_memU(cop, cr, dst, src);
6757  %}
6758%}
6759
6760// Conditional move
6761instruct cmovP_reg(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6762  predicate(VM_Version::supports_cmov() );
6763  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6764  ins_cost(200);
6765  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6766  opcode(0x0F,0x40);
6767  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6768  ins_pipe( pipe_cmov_reg );
6769%}
6770
6771// Conditional move (non-P6 version)
6772// Note:  a CMoveP is generated for  stubs and native wrappers
6773//        regardless of whether we are on a P6, so we
6774//        emulate a cmov here
6775instruct cmovP_reg_nonP6(eRegP dst, eRegP src, eFlagsReg cr, cmpOp cop ) %{
6776  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6777  ins_cost(300);
6778  format %{ "Jn$cop   skip\n\t"
6779          "MOV    $dst,$src\t# pointer\n"
6780      "skip:" %}
6781  opcode(0x8b);
6782  ins_encode( enc_cmov_branch(cop, 0x2), OpcP, RegReg(dst, src));
6783  ins_pipe( pipe_cmov_reg );
6784%}
6785
6786// Conditional move
6787instruct cmovP_regU(cmpOpU cop, eFlagsRegU cr, eRegP dst, eRegP src ) %{
6788  predicate(VM_Version::supports_cmov() );
6789  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6790  ins_cost(200);
6791  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6792  opcode(0x0F,0x40);
6793  ins_encode( enc_cmov(cop), RegReg( dst, src ) );
6794  ins_pipe( pipe_cmov_reg );
6795%}
6796
6797instruct cmovP_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegP dst, eRegP src ) %{
6798  predicate(VM_Version::supports_cmov() );
6799  match(Set dst (CMoveP (Binary cop cr) (Binary dst src)));
6800  ins_cost(200);
6801  expand %{
6802    cmovP_regU(cop, cr, dst, src);
6803  %}
6804%}
6805
6806// DISABLED: Requires the ADLC to emit a bottom_type call that
6807// correctly meets the two pointer arguments; one is an incoming
6808// register but the other is a memory operand.  ALSO appears to
6809// be buggy with implicit null checks.
6810//
6811//// Conditional move
6812//instruct cmovP_mem(cmpOp cop, eFlagsReg cr, eRegP dst, memory src) %{
6813//  predicate(VM_Version::supports_cmov() );
6814//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6815//  ins_cost(250);
6816//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6817//  opcode(0x0F,0x40);
6818//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6819//  ins_pipe( pipe_cmov_mem );
6820//%}
6821//
6822//// Conditional move
6823//instruct cmovP_memU(cmpOpU cop, eFlagsRegU cr, eRegP dst, memory src) %{
6824//  predicate(VM_Version::supports_cmov() );
6825//  match(Set dst (CMoveP (Binary cop cr) (Binary dst (LoadP src))));
6826//  ins_cost(250);
6827//  format %{ "CMOV$cop $dst,$src\t# ptr" %}
6828//  opcode(0x0F,0x40);
6829//  ins_encode( enc_cmov(cop), RegMem( dst, src ) );
6830//  ins_pipe( pipe_cmov_mem );
6831//%}
6832
6833// Conditional move
6834instruct fcmovDPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regDPR1 dst, regDPR src) %{
6835  predicate(UseSSE<=1);
6836  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6837  ins_cost(200);
6838  format %{ "FCMOV$cop $dst,$src\t# double" %}
6839  opcode(0xDA);
6840  ins_encode( enc_cmov_dpr(cop,src) );
6841  ins_pipe( pipe_cmovDPR_reg );
6842%}
6843
6844// Conditional move
6845instruct fcmovFPR_regU(cmpOp_fcmov cop, eFlagsRegU cr, regFPR1 dst, regFPR src) %{
6846  predicate(UseSSE==0);
6847  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6848  ins_cost(200);
6849  format %{ "FCMOV$cop $dst,$src\t# float" %}
6850  opcode(0xDA);
6851  ins_encode( enc_cmov_dpr(cop,src) );
6852  ins_pipe( pipe_cmovDPR_reg );
6853%}
6854
6855// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6856instruct fcmovDPR_regS(cmpOp cop, eFlagsReg cr, regDPR dst, regDPR src) %{
6857  predicate(UseSSE<=1);
6858  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6859  ins_cost(200);
6860  format %{ "Jn$cop   skip\n\t"
6861            "MOV    $dst,$src\t# double\n"
6862      "skip:" %}
6863  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6864  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_DPR(src), OpcP, RegOpc(dst) );
6865  ins_pipe( pipe_cmovDPR_reg );
6866%}
6867
6868// Float CMOV on Intel doesn't handle *signed* compares, only unsigned.
6869instruct fcmovFPR_regS(cmpOp cop, eFlagsReg cr, regFPR dst, regFPR src) %{
6870  predicate(UseSSE==0);
6871  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6872  ins_cost(200);
6873  format %{ "Jn$cop    skip\n\t"
6874            "MOV    $dst,$src\t# float\n"
6875      "skip:" %}
6876  opcode (0xdd, 0x3);     /* DD D8+i or DD /3 */
6877  ins_encode( enc_cmov_branch( cop, 0x4 ), Push_Reg_FPR(src), OpcP, RegOpc(dst) );
6878  ins_pipe( pipe_cmovDPR_reg );
6879%}
6880
6881// No CMOVE with SSE/SSE2
6882instruct fcmovF_regS(cmpOp cop, eFlagsReg cr, regF dst, regF src) %{
6883  predicate (UseSSE>=1);
6884  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6885  ins_cost(200);
6886  format %{ "Jn$cop   skip\n\t"
6887            "MOVSS  $dst,$src\t# float\n"
6888      "skip:" %}
6889  ins_encode %{
6890    Label skip;
6891    // Invert sense of branch from sense of CMOV
6892    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6893    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6894    __ bind(skip);
6895  %}
6896  ins_pipe( pipe_slow );
6897%}
6898
6899// No CMOVE with SSE/SSE2
6900instruct fcmovD_regS(cmpOp cop, eFlagsReg cr, regD dst, regD src) %{
6901  predicate (UseSSE>=2);
6902  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6903  ins_cost(200);
6904  format %{ "Jn$cop   skip\n\t"
6905            "MOVSD  $dst,$src\t# float\n"
6906      "skip:" %}
6907  ins_encode %{
6908    Label skip;
6909    // Invert sense of branch from sense of CMOV
6910    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6911    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6912    __ bind(skip);
6913  %}
6914  ins_pipe( pipe_slow );
6915%}
6916
6917// unsigned version
6918instruct fcmovF_regU(cmpOpU cop, eFlagsRegU cr, regF dst, regF src) %{
6919  predicate (UseSSE>=1);
6920  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6921  ins_cost(200);
6922  format %{ "Jn$cop   skip\n\t"
6923            "MOVSS  $dst,$src\t# float\n"
6924      "skip:" %}
6925  ins_encode %{
6926    Label skip;
6927    // Invert sense of branch from sense of CMOV
6928    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6929    __ movflt($dst$$XMMRegister, $src$$XMMRegister);
6930    __ bind(skip);
6931  %}
6932  ins_pipe( pipe_slow );
6933%}
6934
6935instruct fcmovF_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regF dst, regF src) %{
6936  predicate (UseSSE>=1);
6937  match(Set dst (CMoveF (Binary cop cr) (Binary dst src)));
6938  ins_cost(200);
6939  expand %{
6940    fcmovF_regU(cop, cr, dst, src);
6941  %}
6942%}
6943
6944// unsigned version
6945instruct fcmovD_regU(cmpOpU cop, eFlagsRegU cr, regD dst, regD src) %{
6946  predicate (UseSSE>=2);
6947  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6948  ins_cost(200);
6949  format %{ "Jn$cop   skip\n\t"
6950            "MOVSD  $dst,$src\t# float\n"
6951      "skip:" %}
6952  ins_encode %{
6953    Label skip;
6954    // Invert sense of branch from sense of CMOV
6955    __ jccb((Assembler::Condition)($cop$$cmpcode^1), skip);
6956    __ movdbl($dst$$XMMRegister, $src$$XMMRegister);
6957    __ bind(skip);
6958  %}
6959  ins_pipe( pipe_slow );
6960%}
6961
6962instruct fcmovD_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, regD dst, regD src) %{
6963  predicate (UseSSE>=2);
6964  match(Set dst (CMoveD (Binary cop cr) (Binary dst src)));
6965  ins_cost(200);
6966  expand %{
6967    fcmovD_regU(cop, cr, dst, src);
6968  %}
6969%}
6970
6971instruct cmovL_reg(cmpOp cop, eFlagsReg cr, eRegL dst, eRegL src) %{
6972  predicate(VM_Version::supports_cmov() );
6973  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6974  ins_cost(200);
6975  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6976            "CMOV$cop $dst.hi,$src.hi" %}
6977  opcode(0x0F,0x40);
6978  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6979  ins_pipe( pipe_cmov_reg_long );
6980%}
6981
6982instruct cmovL_regU(cmpOpU cop, eFlagsRegU cr, eRegL dst, eRegL src) %{
6983  predicate(VM_Version::supports_cmov() );
6984  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6985  ins_cost(200);
6986  format %{ "CMOV$cop $dst.lo,$src.lo\n\t"
6987            "CMOV$cop $dst.hi,$src.hi" %}
6988  opcode(0x0F,0x40);
6989  ins_encode( enc_cmov(cop), RegReg_Lo2( dst, src ), enc_cmov(cop), RegReg_Hi2( dst, src ) );
6990  ins_pipe( pipe_cmov_reg_long );
6991%}
6992
6993instruct cmovL_regUCF(cmpOpUCF cop, eFlagsRegUCF cr, eRegL dst, eRegL src) %{
6994  predicate(VM_Version::supports_cmov() );
6995  match(Set dst (CMoveL (Binary cop cr) (Binary dst src)));
6996  ins_cost(200);
6997  expand %{
6998    cmovL_regU(cop, cr, dst, src);
6999  %}
7000%}
7001
7002//----------Arithmetic Instructions--------------------------------------------
7003//----------Addition Instructions----------------------------------------------
7004
7005// Integer Addition Instructions
7006instruct addI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7007  match(Set dst (AddI dst src));
7008  effect(KILL cr);
7009
7010  size(2);
7011  format %{ "ADD    $dst,$src" %}
7012  opcode(0x03);
7013  ins_encode( OpcP, RegReg( dst, src) );
7014  ins_pipe( ialu_reg_reg );
7015%}
7016
7017instruct addI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7018  match(Set dst (AddI dst src));
7019  effect(KILL cr);
7020
7021  format %{ "ADD    $dst,$src" %}
7022  opcode(0x81, 0x00); /* /0 id */
7023  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7024  ins_pipe( ialu_reg );
7025%}
7026
7027instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
7028  predicate(UseIncDec);
7029  match(Set dst (AddI dst src));
7030  effect(KILL cr);
7031
7032  size(1);
7033  format %{ "INC    $dst" %}
7034  opcode(0x40); /*  */
7035  ins_encode( Opc_plus( primary, dst ) );
7036  ins_pipe( ialu_reg );
7037%}
7038
7039instruct leaI_eReg_immI(rRegI dst, rRegI src0, immI src1) %{
7040  match(Set dst (AddI src0 src1));
7041  ins_cost(110);
7042
7043  format %{ "LEA    $dst,[$src0 + $src1]" %}
7044  opcode(0x8D); /* 0x8D /r */
7045  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7046  ins_pipe( ialu_reg_reg );
7047%}
7048
7049instruct leaP_eReg_immI(eRegP dst, eRegP src0, immI src1) %{
7050  match(Set dst (AddP src0 src1));
7051  ins_cost(110);
7052
7053  format %{ "LEA    $dst,[$src0 + $src1]\t# ptr" %}
7054  opcode(0x8D); /* 0x8D /r */
7055  ins_encode( OpcP, RegLea( dst, src0, src1 ) );
7056  ins_pipe( ialu_reg_reg );
7057%}
7058
7059instruct decI_eReg(rRegI dst, immI_M1 src, eFlagsReg cr) %{
7060  predicate(UseIncDec);
7061  match(Set dst (AddI dst src));
7062  effect(KILL cr);
7063
7064  size(1);
7065  format %{ "DEC    $dst" %}
7066  opcode(0x48); /*  */
7067  ins_encode( Opc_plus( primary, dst ) );
7068  ins_pipe( ialu_reg );
7069%}
7070
7071instruct addP_eReg(eRegP dst, rRegI src, eFlagsReg cr) %{
7072  match(Set dst (AddP dst src));
7073  effect(KILL cr);
7074
7075  size(2);
7076  format %{ "ADD    $dst,$src" %}
7077  opcode(0x03);
7078  ins_encode( OpcP, RegReg( dst, src) );
7079  ins_pipe( ialu_reg_reg );
7080%}
7081
7082instruct addP_eReg_imm(eRegP dst, immI src, eFlagsReg cr) %{
7083  match(Set dst (AddP dst src));
7084  effect(KILL cr);
7085
7086  format %{ "ADD    $dst,$src" %}
7087  opcode(0x81,0x00); /* Opcode 81 /0 id */
7088  // ins_encode( RegImm( dst, src) );
7089  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7090  ins_pipe( ialu_reg );
7091%}
7092
7093instruct addI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7094  match(Set dst (AddI dst (LoadI src)));
7095  effect(KILL cr);
7096
7097  ins_cost(125);
7098  format %{ "ADD    $dst,$src" %}
7099  opcode(0x03);
7100  ins_encode( OpcP, RegMem( dst, src) );
7101  ins_pipe( ialu_reg_mem );
7102%}
7103
7104instruct addI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7105  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7106  effect(KILL cr);
7107
7108  ins_cost(150);
7109  format %{ "ADD    $dst,$src" %}
7110  opcode(0x01);  /* Opcode 01 /r */
7111  ins_encode( OpcP, RegMem( src, dst ) );
7112  ins_pipe( ialu_mem_reg );
7113%}
7114
7115// Add Memory with Immediate
7116instruct addI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
7117  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7118  effect(KILL cr);
7119
7120  ins_cost(125);
7121  format %{ "ADD    $dst,$src" %}
7122  opcode(0x81);               /* Opcode 81 /0 id */
7123  ins_encode( OpcSE( src ), RMopc_Mem(0x00,dst), Con8or32( src ) );
7124  ins_pipe( ialu_mem_imm );
7125%}
7126
7127instruct incI_mem(memory dst, immI1 src, eFlagsReg cr) %{
7128  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7129  effect(KILL cr);
7130
7131  ins_cost(125);
7132  format %{ "INC    $dst" %}
7133  opcode(0xFF);               /* Opcode FF /0 */
7134  ins_encode( OpcP, RMopc_Mem(0x00,dst));
7135  ins_pipe( ialu_mem_imm );
7136%}
7137
7138instruct decI_mem(memory dst, immI_M1 src, eFlagsReg cr) %{
7139  match(Set dst (StoreI dst (AddI (LoadI dst) src)));
7140  effect(KILL cr);
7141
7142  ins_cost(125);
7143  format %{ "DEC    $dst" %}
7144  opcode(0xFF);               /* Opcode FF /1 */
7145  ins_encode( OpcP, RMopc_Mem(0x01,dst));
7146  ins_pipe( ialu_mem_imm );
7147%}
7148
7149
7150instruct checkCastPP( eRegP dst ) %{
7151  match(Set dst (CheckCastPP dst));
7152
7153  size(0);
7154  format %{ "#checkcastPP of $dst" %}
7155  ins_encode( /*empty encoding*/ );
7156  ins_pipe( empty );
7157%}
7158
7159instruct castPP( eRegP dst ) %{
7160  match(Set dst (CastPP dst));
7161  format %{ "#castPP of $dst" %}
7162  ins_encode( /*empty encoding*/ );
7163  ins_pipe( empty );
7164%}
7165
7166instruct castII( rRegI dst ) %{
7167  match(Set dst (CastII dst));
7168  format %{ "#castII of $dst" %}
7169  ins_encode( /*empty encoding*/ );
7170  ins_cost(0);
7171  ins_pipe( empty );
7172%}
7173
7174
7175// Load-locked - same as a regular pointer load when used with compare-swap
7176instruct loadPLocked(eRegP dst, memory mem) %{
7177  match(Set dst (LoadPLocked mem));
7178
7179  ins_cost(125);
7180  format %{ "MOV    $dst,$mem\t# Load ptr. locked" %}
7181  opcode(0x8B);
7182  ins_encode( OpcP, RegMem(dst,mem));
7183  ins_pipe( ialu_reg_mem );
7184%}
7185
7186// Conditional-store of the updated heap-top.
7187// Used during allocation of the shared heap.
7188// Sets flags (EQ) on success.  Implemented with a CMPXCHG on Intel.
7189instruct storePConditional( memory heap_top_ptr, eAXRegP oldval, eRegP newval, eFlagsReg cr ) %{
7190  match(Set cr (StorePConditional heap_top_ptr (Binary oldval newval)));
7191  // EAX is killed if there is contention, but then it's also unused.
7192  // In the common case of no contention, EAX holds the new oop address.
7193  format %{ "CMPXCHG $heap_top_ptr,$newval\t# If EAX==$heap_top_ptr Then store $newval into $heap_top_ptr" %}
7194  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval,heap_top_ptr) );
7195  ins_pipe( pipe_cmpxchg );
7196%}
7197
7198// Conditional-store of an int value.
7199// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG on Intel.
7200instruct storeIConditional( memory mem, eAXRegI oldval, rRegI newval, eFlagsReg cr ) %{
7201  match(Set cr (StoreIConditional mem (Binary oldval newval)));
7202  effect(KILL oldval);
7203  format %{ "CMPXCHG $mem,$newval\t# If EAX==$mem Then store $newval into $mem" %}
7204  ins_encode( lock_prefix, Opcode(0x0F), Opcode(0xB1), RegMem(newval, mem) );
7205  ins_pipe( pipe_cmpxchg );
7206%}
7207
7208// Conditional-store of a long value.
7209// ZF flag is set on success, reset otherwise.  Implemented with a CMPXCHG8 on Intel.
7210instruct storeLConditional( memory mem, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7211  match(Set cr (StoreLConditional mem (Binary oldval newval)));
7212  effect(KILL oldval);
7213  format %{ "XCHG   EBX,ECX\t# correct order for CMPXCHG8 instruction\n\t"
7214            "CMPXCHG8 $mem,ECX:EBX\t# If EDX:EAX==$mem Then store ECX:EBX into $mem\n\t"
7215            "XCHG   EBX,ECX"
7216  %}
7217  ins_encode %{
7218    // Note: we need to swap rbx, and rcx before and after the
7219    //       cmpxchg8 instruction because the instruction uses
7220    //       rcx as the high order word of the new value to store but
7221    //       our register encoding uses rbx.
7222    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7223    if( os::is_MP() )
7224      __ lock();
7225    __ cmpxchg8($mem$$Address);
7226    __ xchgl(as_Register(EBX_enc), as_Register(ECX_enc));
7227  %}
7228  ins_pipe( pipe_cmpxchg );
7229%}
7230
7231// No flag versions for CompareAndSwap{P,I,L} because matcher can't match them
7232
7233instruct compareAndSwapL( rRegI res, eSIRegP mem_ptr, eADXRegL oldval, eBCXRegL newval, eFlagsReg cr ) %{
7234  predicate(VM_Version::supports_cx8());
7235  match(Set res (CompareAndSwapL mem_ptr (Binary oldval newval)));
7236  effect(KILL cr, KILL oldval);
7237  format %{ "CMPXCHG8 [$mem_ptr],$newval\t# If EDX:EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7238            "MOV    $res,0\n\t"
7239            "JNE,s  fail\n\t"
7240            "MOV    $res,1\n"
7241          "fail:" %}
7242  ins_encode( enc_cmpxchg8(mem_ptr),
7243              enc_flags_ne_to_boolean(res) );
7244  ins_pipe( pipe_cmpxchg );
7245%}
7246
7247instruct compareAndSwapP( rRegI res,  pRegP mem_ptr, eAXRegP oldval, eCXRegP newval, eFlagsReg cr) %{
7248  match(Set res (CompareAndSwapP mem_ptr (Binary oldval newval)));
7249  effect(KILL cr, KILL oldval);
7250  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7251            "MOV    $res,0\n\t"
7252            "JNE,s  fail\n\t"
7253            "MOV    $res,1\n"
7254          "fail:" %}
7255  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7256  ins_pipe( pipe_cmpxchg );
7257%}
7258
7259instruct compareAndSwapI( rRegI res, pRegP mem_ptr, eAXRegI oldval, eCXRegI newval, eFlagsReg cr) %{
7260  match(Set res (CompareAndSwapI mem_ptr (Binary oldval newval)));
7261  effect(KILL cr, KILL oldval);
7262  format %{ "CMPXCHG [$mem_ptr],$newval\t# If EAX==[$mem_ptr] Then store $newval into [$mem_ptr]\n\t"
7263            "MOV    $res,0\n\t"
7264            "JNE,s  fail\n\t"
7265            "MOV    $res,1\n"
7266          "fail:" %}
7267  ins_encode( enc_cmpxchg(mem_ptr), enc_flags_ne_to_boolean(res) );
7268  ins_pipe( pipe_cmpxchg );
7269%}
7270
7271instruct xaddI_no_res( memory mem, Universe dummy, immI add, eFlagsReg cr) %{
7272  predicate(n->as_LoadStore()->result_not_used());
7273  match(Set dummy (GetAndAddI mem add));
7274  effect(KILL cr);
7275  format %{ "ADDL  [$mem],$add" %}
7276  ins_encode %{
7277    if (os::is_MP()) { __ lock(); }
7278    __ addl($mem$$Address, $add$$constant);
7279  %}
7280  ins_pipe( pipe_cmpxchg );
7281%}
7282
7283instruct xaddI( memory mem, rRegI newval, eFlagsReg cr) %{
7284  match(Set newval (GetAndAddI mem newval));
7285  effect(KILL cr);
7286  format %{ "XADDL  [$mem],$newval" %}
7287  ins_encode %{
7288    if (os::is_MP()) { __ lock(); }
7289    __ xaddl($mem$$Address, $newval$$Register);
7290  %}
7291  ins_pipe( pipe_cmpxchg );
7292%}
7293
7294instruct xchgI( memory mem, rRegI newval) %{
7295  match(Set newval (GetAndSetI mem newval));
7296  format %{ "XCHGL  $newval,[$mem]" %}
7297  ins_encode %{
7298    __ xchgl($newval$$Register, $mem$$Address);
7299  %}
7300  ins_pipe( pipe_cmpxchg );
7301%}
7302
7303instruct xchgP( memory mem, pRegP newval) %{
7304  match(Set newval (GetAndSetP mem newval));
7305  format %{ "XCHGL  $newval,[$mem]" %}
7306  ins_encode %{
7307    __ xchgl($newval$$Register, $mem$$Address);
7308  %}
7309  ins_pipe( pipe_cmpxchg );
7310%}
7311
7312//----------Subtraction Instructions-------------------------------------------
7313
7314// Integer Subtraction Instructions
7315instruct subI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7316  match(Set dst (SubI dst src));
7317  effect(KILL cr);
7318
7319  size(2);
7320  format %{ "SUB    $dst,$src" %}
7321  opcode(0x2B);
7322  ins_encode( OpcP, RegReg( dst, src) );
7323  ins_pipe( ialu_reg_reg );
7324%}
7325
7326instruct subI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7327  match(Set dst (SubI dst src));
7328  effect(KILL cr);
7329
7330  format %{ "SUB    $dst,$src" %}
7331  opcode(0x81,0x05);  /* Opcode 81 /5 */
7332  // ins_encode( RegImm( dst, src) );
7333  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7334  ins_pipe( ialu_reg );
7335%}
7336
7337instruct subI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
7338  match(Set dst (SubI dst (LoadI src)));
7339  effect(KILL cr);
7340
7341  ins_cost(125);
7342  format %{ "SUB    $dst,$src" %}
7343  opcode(0x2B);
7344  ins_encode( OpcP, RegMem( dst, src) );
7345  ins_pipe( ialu_reg_mem );
7346%}
7347
7348instruct subI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
7349  match(Set dst (StoreI dst (SubI (LoadI dst) src)));
7350  effect(KILL cr);
7351
7352  ins_cost(150);
7353  format %{ "SUB    $dst,$src" %}
7354  opcode(0x29);  /* Opcode 29 /r */
7355  ins_encode( OpcP, RegMem( src, dst ) );
7356  ins_pipe( ialu_mem_reg );
7357%}
7358
7359// Subtract from a pointer
7360instruct subP_eReg(eRegP dst, rRegI src, immI0 zero, eFlagsReg cr) %{
7361  match(Set dst (AddP dst (SubI zero src)));
7362  effect(KILL cr);
7363
7364  size(2);
7365  format %{ "SUB    $dst,$src" %}
7366  opcode(0x2B);
7367  ins_encode( OpcP, RegReg( dst, src) );
7368  ins_pipe( ialu_reg_reg );
7369%}
7370
7371instruct negI_eReg(rRegI dst, immI0 zero, eFlagsReg cr) %{
7372  match(Set dst (SubI zero dst));
7373  effect(KILL cr);
7374
7375  size(2);
7376  format %{ "NEG    $dst" %}
7377  opcode(0xF7,0x03);  // Opcode F7 /3
7378  ins_encode( OpcP, RegOpc( dst ) );
7379  ins_pipe( ialu_reg );
7380%}
7381
7382//----------Multiplication/Division Instructions-------------------------------
7383// Integer Multiplication Instructions
7384// Multiply Register
7385instruct mulI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7386  match(Set dst (MulI dst src));
7387  effect(KILL cr);
7388
7389  size(3);
7390  ins_cost(300);
7391  format %{ "IMUL   $dst,$src" %}
7392  opcode(0xAF, 0x0F);
7393  ins_encode( OpcS, OpcP, RegReg( dst, src) );
7394  ins_pipe( ialu_reg_reg_alu0 );
7395%}
7396
7397// Multiply 32-bit Immediate
7398instruct mulI_eReg_imm(rRegI dst, rRegI src, immI imm, eFlagsReg cr) %{
7399  match(Set dst (MulI src imm));
7400  effect(KILL cr);
7401
7402  ins_cost(300);
7403  format %{ "IMUL   $dst,$src,$imm" %}
7404  opcode(0x69);  /* 69 /r id */
7405  ins_encode( OpcSE(imm), RegReg( dst, src ), Con8or32( imm ) );
7406  ins_pipe( ialu_reg_reg_alu0 );
7407%}
7408
7409instruct loadConL_low_only(eADXRegL_low_only dst, immL32 src, eFlagsReg cr) %{
7410  match(Set dst src);
7411  effect(KILL cr);
7412
7413  // Note that this is artificially increased to make it more expensive than loadConL
7414  ins_cost(250);
7415  format %{ "MOV    EAX,$src\t// low word only" %}
7416  opcode(0xB8);
7417  ins_encode( LdImmL_Lo(dst, src) );
7418  ins_pipe( ialu_reg_fat );
7419%}
7420
7421// Multiply by 32-bit Immediate, taking the shifted high order results
7422//  (special case for shift by 32)
7423instruct mulI_imm_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32 cnt, eFlagsReg cr) %{
7424  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7425  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7426             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7427             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7428  effect(USE src1, KILL cr);
7429
7430  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7431  ins_cost(0*100 + 1*400 - 150);
7432  format %{ "IMUL   EDX:EAX,$src1" %}
7433  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7434  ins_pipe( pipe_slow );
7435%}
7436
7437// Multiply by 32-bit Immediate, taking the shifted high order results
7438instruct mulI_imm_RShift_high(eDXRegI dst, nadxRegI src1, eADXRegL_low_only src2, immI_32_63 cnt, eFlagsReg cr) %{
7439  match(Set dst (ConvL2I (RShiftL (MulL (ConvI2L src1) src2) cnt)));
7440  predicate( _kids[0]->_kids[0]->_kids[1]->_leaf->Opcode() == Op_ConL &&
7441             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() >= min_jint &&
7442             _kids[0]->_kids[0]->_kids[1]->_leaf->as_Type()->type()->is_long()->get_con() <= max_jint );
7443  effect(USE src1, KILL cr);
7444
7445  // Note that this is adjusted by 150 to compensate for the overcosting of loadConL_low_only
7446  ins_cost(1*100 + 1*400 - 150);
7447  format %{ "IMUL   EDX:EAX,$src1\n\t"
7448            "SAR    EDX,$cnt-32" %}
7449  ins_encode( multiply_con_and_shift_high( dst, src1, src2, cnt, cr ) );
7450  ins_pipe( pipe_slow );
7451%}
7452
7453// Multiply Memory 32-bit Immediate
7454instruct mulI_mem_imm(rRegI dst, memory src, immI imm, eFlagsReg cr) %{
7455  match(Set dst (MulI (LoadI src) imm));
7456  effect(KILL cr);
7457
7458  ins_cost(300);
7459  format %{ "IMUL   $dst,$src,$imm" %}
7460  opcode(0x69);  /* 69 /r id */
7461  ins_encode( OpcSE(imm), RegMem( dst, src ), Con8or32( imm ) );
7462  ins_pipe( ialu_reg_mem_alu0 );
7463%}
7464
7465// Multiply Memory
7466instruct mulI(rRegI dst, memory src, eFlagsReg cr) %{
7467  match(Set dst (MulI dst (LoadI src)));
7468  effect(KILL cr);
7469
7470  ins_cost(350);
7471  format %{ "IMUL   $dst,$src" %}
7472  opcode(0xAF, 0x0F);
7473  ins_encode( OpcS, OpcP, RegMem( dst, src) );
7474  ins_pipe( ialu_reg_mem_alu0 );
7475%}
7476
7477// Multiply Register Int to Long
7478instruct mulI2L(eADXRegL dst, eAXRegI src, nadxRegI src1, eFlagsReg flags) %{
7479  // Basic Idea: long = (long)int * (long)int
7480  match(Set dst (MulL (ConvI2L src) (ConvI2L src1)));
7481  effect(DEF dst, USE src, USE src1, KILL flags);
7482
7483  ins_cost(300);
7484  format %{ "IMUL   $dst,$src1" %}
7485
7486  ins_encode( long_int_multiply( dst, src1 ) );
7487  ins_pipe( ialu_reg_reg_alu0 );
7488%}
7489
7490instruct mulIS_eReg(eADXRegL dst, immL_32bits mask, eFlagsReg flags, eAXRegI src, nadxRegI src1) %{
7491  // Basic Idea:  long = (int & 0xffffffffL) * (int & 0xffffffffL)
7492  match(Set dst (MulL (AndL (ConvI2L src) mask) (AndL (ConvI2L src1) mask)));
7493  effect(KILL flags);
7494
7495  ins_cost(300);
7496  format %{ "MUL    $dst,$src1" %}
7497
7498  ins_encode( long_uint_multiply(dst, src1) );
7499  ins_pipe( ialu_reg_reg_alu0 );
7500%}
7501
7502// Multiply Register Long
7503instruct mulL_eReg(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7504  match(Set dst (MulL dst src));
7505  effect(KILL cr, TEMP tmp);
7506  ins_cost(4*100+3*400);
7507// Basic idea: lo(result) = lo(x_lo * y_lo)
7508//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) + lo(x_lo * y_hi)
7509  format %{ "MOV    $tmp,$src.lo\n\t"
7510            "IMUL   $tmp,EDX\n\t"
7511            "MOV    EDX,$src.hi\n\t"
7512            "IMUL   EDX,EAX\n\t"
7513            "ADD    $tmp,EDX\n\t"
7514            "MUL    EDX:EAX,$src.lo\n\t"
7515            "ADD    EDX,$tmp" %}
7516  ins_encode( long_multiply( dst, src, tmp ) );
7517  ins_pipe( pipe_slow );
7518%}
7519
7520// Multiply Register Long where the left operand's high 32 bits are zero
7521instruct mulL_eReg_lhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7522  predicate(is_operand_hi32_zero(n->in(1)));
7523  match(Set dst (MulL dst src));
7524  effect(KILL cr, TEMP tmp);
7525  ins_cost(2*100+2*400);
7526// Basic idea: lo(result) = lo(x_lo * y_lo)
7527//             hi(result) = hi(x_lo * y_lo) + lo(x_lo * y_hi) where lo(x_hi * y_lo) = 0 because x_hi = 0
7528  format %{ "MOV    $tmp,$src.hi\n\t"
7529            "IMUL   $tmp,EAX\n\t"
7530            "MUL    EDX:EAX,$src.lo\n\t"
7531            "ADD    EDX,$tmp" %}
7532  ins_encode %{
7533    __ movl($tmp$$Register, HIGH_FROM_LOW($src$$Register));
7534    __ imull($tmp$$Register, rax);
7535    __ mull($src$$Register);
7536    __ addl(rdx, $tmp$$Register);
7537  %}
7538  ins_pipe( pipe_slow );
7539%}
7540
7541// Multiply Register Long where the right operand's high 32 bits are zero
7542instruct mulL_eReg_rhi0(eADXRegL dst, eRegL src, rRegI tmp, eFlagsReg cr) %{
7543  predicate(is_operand_hi32_zero(n->in(2)));
7544  match(Set dst (MulL dst src));
7545  effect(KILL cr, TEMP tmp);
7546  ins_cost(2*100+2*400);
7547// Basic idea: lo(result) = lo(x_lo * y_lo)
7548//             hi(result) = hi(x_lo * y_lo) + lo(x_hi * y_lo) where lo(x_lo * y_hi) = 0 because y_hi = 0
7549  format %{ "MOV    $tmp,$src.lo\n\t"
7550            "IMUL   $tmp,EDX\n\t"
7551            "MUL    EDX:EAX,$src.lo\n\t"
7552            "ADD    EDX,$tmp" %}
7553  ins_encode %{
7554    __ movl($tmp$$Register, $src$$Register);
7555    __ imull($tmp$$Register, rdx);
7556    __ mull($src$$Register);
7557    __ addl(rdx, $tmp$$Register);
7558  %}
7559  ins_pipe( pipe_slow );
7560%}
7561
7562// Multiply Register Long where the left and the right operands' high 32 bits are zero
7563instruct mulL_eReg_hi0(eADXRegL dst, eRegL src, eFlagsReg cr) %{
7564  predicate(is_operand_hi32_zero(n->in(1)) && is_operand_hi32_zero(n->in(2)));
7565  match(Set dst (MulL dst src));
7566  effect(KILL cr);
7567  ins_cost(1*400);
7568// Basic idea: lo(result) = lo(x_lo * y_lo)
7569//             hi(result) = hi(x_lo * y_lo) where lo(x_hi * y_lo) = 0 and lo(x_lo * y_hi) = 0 because x_hi = 0 and y_hi = 0
7570  format %{ "MUL    EDX:EAX,$src.lo\n\t" %}
7571  ins_encode %{
7572    __ mull($src$$Register);
7573  %}
7574  ins_pipe( pipe_slow );
7575%}
7576
7577// Multiply Register Long by small constant
7578instruct mulL_eReg_con(eADXRegL dst, immL_127 src, rRegI tmp, eFlagsReg cr) %{
7579  match(Set dst (MulL dst src));
7580  effect(KILL cr, TEMP tmp);
7581  ins_cost(2*100+2*400);
7582  size(12);
7583// Basic idea: lo(result) = lo(src * EAX)
7584//             hi(result) = hi(src * EAX) + lo(src * EDX)
7585  format %{ "IMUL   $tmp,EDX,$src\n\t"
7586            "MOV    EDX,$src\n\t"
7587            "MUL    EDX\t# EDX*EAX -> EDX:EAX\n\t"
7588            "ADD    EDX,$tmp" %}
7589  ins_encode( long_multiply_con( dst, src, tmp ) );
7590  ins_pipe( pipe_slow );
7591%}
7592
7593// Integer DIV with Register
7594instruct divI_eReg(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7595  match(Set rax (DivI rax div));
7596  effect(KILL rdx, KILL cr);
7597  size(26);
7598  ins_cost(30*100+10*100);
7599  format %{ "CMP    EAX,0x80000000\n\t"
7600            "JNE,s  normal\n\t"
7601            "XOR    EDX,EDX\n\t"
7602            "CMP    ECX,-1\n\t"
7603            "JE,s   done\n"
7604    "normal: CDQ\n\t"
7605            "IDIV   $div\n\t"
7606    "done:"        %}
7607  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7608  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7609  ins_pipe( ialu_reg_reg_alu0 );
7610%}
7611
7612// Divide Register Long
7613instruct divL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7614  match(Set dst (DivL src1 src2));
7615  effect( KILL cr, KILL cx, KILL bx );
7616  ins_cost(10000);
7617  format %{ "PUSH   $src1.hi\n\t"
7618            "PUSH   $src1.lo\n\t"
7619            "PUSH   $src2.hi\n\t"
7620            "PUSH   $src2.lo\n\t"
7621            "CALL   SharedRuntime::ldiv\n\t"
7622            "ADD    ESP,16" %}
7623  ins_encode( long_div(src1,src2) );
7624  ins_pipe( pipe_slow );
7625%}
7626
7627// Integer DIVMOD with Register, both quotient and mod results
7628instruct divModI_eReg_divmod(eAXRegI rax, eDXRegI rdx, eCXRegI div, eFlagsReg cr) %{
7629  match(DivModI rax div);
7630  effect(KILL cr);
7631  size(26);
7632  ins_cost(30*100+10*100);
7633  format %{ "CMP    EAX,0x80000000\n\t"
7634            "JNE,s  normal\n\t"
7635            "XOR    EDX,EDX\n\t"
7636            "CMP    ECX,-1\n\t"
7637            "JE,s   done\n"
7638    "normal: CDQ\n\t"
7639            "IDIV   $div\n\t"
7640    "done:"        %}
7641  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7642  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7643  ins_pipe( pipe_slow );
7644%}
7645
7646// Integer MOD with Register
7647instruct modI_eReg(eDXRegI rdx, eAXRegI rax, eCXRegI div, eFlagsReg cr) %{
7648  match(Set rdx (ModI rax div));
7649  effect(KILL rax, KILL cr);
7650
7651  size(26);
7652  ins_cost(300);
7653  format %{ "CDQ\n\t"
7654            "IDIV   $div" %}
7655  opcode(0xF7, 0x7);  /* Opcode F7 /7 */
7656  ins_encode( cdq_enc, OpcP, RegOpc(div) );
7657  ins_pipe( ialu_reg_reg_alu0 );
7658%}
7659
7660// Remainder Register Long
7661instruct modL_eReg( eADXRegL dst, eRegL src1, eRegL src2, eFlagsReg cr, eCXRegI cx, eBXRegI bx ) %{
7662  match(Set dst (ModL src1 src2));
7663  effect( KILL cr, KILL cx, KILL bx );
7664  ins_cost(10000);
7665  format %{ "PUSH   $src1.hi\n\t"
7666            "PUSH   $src1.lo\n\t"
7667            "PUSH   $src2.hi\n\t"
7668            "PUSH   $src2.lo\n\t"
7669            "CALL   SharedRuntime::lrem\n\t"
7670            "ADD    ESP,16" %}
7671  ins_encode( long_mod(src1,src2) );
7672  ins_pipe( pipe_slow );
7673%}
7674
7675// Divide Register Long (no special case since divisor != -1)
7676instruct divL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7677  match(Set dst (DivL dst imm));
7678  effect( TEMP tmp, TEMP tmp2, KILL cr );
7679  ins_cost(1000);
7680  format %{ "MOV    $tmp,abs($imm) # ldiv EDX:EAX,$imm\n\t"
7681            "XOR    $tmp2,$tmp2\n\t"
7682            "CMP    $tmp,EDX\n\t"
7683            "JA,s   fast\n\t"
7684            "MOV    $tmp2,EAX\n\t"
7685            "MOV    EAX,EDX\n\t"
7686            "MOV    EDX,0\n\t"
7687            "JLE,s  pos\n\t"
7688            "LNEG   EAX : $tmp2\n\t"
7689            "DIV    $tmp # unsigned division\n\t"
7690            "XCHG   EAX,$tmp2\n\t"
7691            "DIV    $tmp\n\t"
7692            "LNEG   $tmp2 : EAX\n\t"
7693            "JMP,s  done\n"
7694    "pos:\n\t"
7695            "DIV    $tmp\n\t"
7696            "XCHG   EAX,$tmp2\n"
7697    "fast:\n\t"
7698            "DIV    $tmp\n"
7699    "done:\n\t"
7700            "MOV    EDX,$tmp2\n\t"
7701            "NEG    EDX:EAX # if $imm < 0" %}
7702  ins_encode %{
7703    int con = (int)$imm$$constant;
7704    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7705    int pcon = (con > 0) ? con : -con;
7706    Label Lfast, Lpos, Ldone;
7707
7708    __ movl($tmp$$Register, pcon);
7709    __ xorl($tmp2$$Register,$tmp2$$Register);
7710    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7711    __ jccb(Assembler::above, Lfast); // result fits into 32 bit
7712
7713    __ movl($tmp2$$Register, $dst$$Register); // save
7714    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7715    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7716    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7717
7718    // Negative dividend.
7719    // convert value to positive to use unsigned division
7720    __ lneg($dst$$Register, $tmp2$$Register);
7721    __ divl($tmp$$Register);
7722    __ xchgl($dst$$Register, $tmp2$$Register);
7723    __ divl($tmp$$Register);
7724    // revert result back to negative
7725    __ lneg($tmp2$$Register, $dst$$Register);
7726    __ jmpb(Ldone);
7727
7728    __ bind(Lpos);
7729    __ divl($tmp$$Register); // Use unsigned division
7730    __ xchgl($dst$$Register, $tmp2$$Register);
7731    // Fallthrow for final divide, tmp2 has 32 bit hi result
7732
7733    __ bind(Lfast);
7734    // fast path: src is positive
7735    __ divl($tmp$$Register); // Use unsigned division
7736
7737    __ bind(Ldone);
7738    __ movl(HIGH_FROM_LOW($dst$$Register),$tmp2$$Register);
7739    if (con < 0) {
7740      __ lneg(HIGH_FROM_LOW($dst$$Register), $dst$$Register);
7741    }
7742  %}
7743  ins_pipe( pipe_slow );
7744%}
7745
7746// Remainder Register Long (remainder fit into 32 bits)
7747instruct modL_eReg_imm32( eADXRegL dst, immL32 imm, rRegI tmp, rRegI tmp2, eFlagsReg cr ) %{
7748  match(Set dst (ModL dst imm));
7749  effect( TEMP tmp, TEMP tmp2, KILL cr );
7750  ins_cost(1000);
7751  format %{ "MOV    $tmp,abs($imm) # lrem EDX:EAX,$imm\n\t"
7752            "CMP    $tmp,EDX\n\t"
7753            "JA,s   fast\n\t"
7754            "MOV    $tmp2,EAX\n\t"
7755            "MOV    EAX,EDX\n\t"
7756            "MOV    EDX,0\n\t"
7757            "JLE,s  pos\n\t"
7758            "LNEG   EAX : $tmp2\n\t"
7759            "DIV    $tmp # unsigned division\n\t"
7760            "MOV    EAX,$tmp2\n\t"
7761            "DIV    $tmp\n\t"
7762            "NEG    EDX\n\t"
7763            "JMP,s  done\n"
7764    "pos:\n\t"
7765            "DIV    $tmp\n\t"
7766            "MOV    EAX,$tmp2\n"
7767    "fast:\n\t"
7768            "DIV    $tmp\n"
7769    "done:\n\t"
7770            "MOV    EAX,EDX\n\t"
7771            "SAR    EDX,31\n\t" %}
7772  ins_encode %{
7773    int con = (int)$imm$$constant;
7774    assert(con != 0 && con != -1 && con != min_jint, "wrong divisor");
7775    int pcon = (con > 0) ? con : -con;
7776    Label  Lfast, Lpos, Ldone;
7777
7778    __ movl($tmp$$Register, pcon);
7779    __ cmpl($tmp$$Register, HIGH_FROM_LOW($dst$$Register));
7780    __ jccb(Assembler::above, Lfast); // src is positive and result fits into 32 bit
7781
7782    __ movl($tmp2$$Register, $dst$$Register); // save
7783    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7784    __ movl(HIGH_FROM_LOW($dst$$Register),0); // preserve flags
7785    __ jccb(Assembler::lessEqual, Lpos); // result is positive
7786
7787    // Negative dividend.
7788    // convert value to positive to use unsigned division
7789    __ lneg($dst$$Register, $tmp2$$Register);
7790    __ divl($tmp$$Register);
7791    __ movl($dst$$Register, $tmp2$$Register);
7792    __ divl($tmp$$Register);
7793    // revert remainder back to negative
7794    __ negl(HIGH_FROM_LOW($dst$$Register));
7795    __ jmpb(Ldone);
7796
7797    __ bind(Lpos);
7798    __ divl($tmp$$Register);
7799    __ movl($dst$$Register, $tmp2$$Register);
7800
7801    __ bind(Lfast);
7802    // fast path: src is positive
7803    __ divl($tmp$$Register);
7804
7805    __ bind(Ldone);
7806    __ movl($dst$$Register, HIGH_FROM_LOW($dst$$Register));
7807    __ sarl(HIGH_FROM_LOW($dst$$Register), 31); // result sign
7808
7809  %}
7810  ins_pipe( pipe_slow );
7811%}
7812
7813// Integer Shift Instructions
7814// Shift Left by one
7815instruct shlI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7816  match(Set dst (LShiftI dst shift));
7817  effect(KILL cr);
7818
7819  size(2);
7820  format %{ "SHL    $dst,$shift" %}
7821  opcode(0xD1, 0x4);  /* D1 /4 */
7822  ins_encode( OpcP, RegOpc( dst ) );
7823  ins_pipe( ialu_reg );
7824%}
7825
7826// Shift Left by 8-bit immediate
7827instruct salI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7828  match(Set dst (LShiftI dst shift));
7829  effect(KILL cr);
7830
7831  size(3);
7832  format %{ "SHL    $dst,$shift" %}
7833  opcode(0xC1, 0x4);  /* C1 /4 ib */
7834  ins_encode( RegOpcImm( dst, shift) );
7835  ins_pipe( ialu_reg );
7836%}
7837
7838// Shift Left by variable
7839instruct salI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7840  match(Set dst (LShiftI dst shift));
7841  effect(KILL cr);
7842
7843  size(2);
7844  format %{ "SHL    $dst,$shift" %}
7845  opcode(0xD3, 0x4);  /* D3 /4 */
7846  ins_encode( OpcP, RegOpc( dst ) );
7847  ins_pipe( ialu_reg_reg );
7848%}
7849
7850// Arithmetic shift right by one
7851instruct sarI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7852  match(Set dst (RShiftI dst shift));
7853  effect(KILL cr);
7854
7855  size(2);
7856  format %{ "SAR    $dst,$shift" %}
7857  opcode(0xD1, 0x7);  /* D1 /7 */
7858  ins_encode( OpcP, RegOpc( dst ) );
7859  ins_pipe( ialu_reg );
7860%}
7861
7862// Arithmetic shift right by one
7863instruct sarI_mem_1(memory dst, immI1 shift, eFlagsReg cr) %{
7864  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7865  effect(KILL cr);
7866  format %{ "SAR    $dst,$shift" %}
7867  opcode(0xD1, 0x7);  /* D1 /7 */
7868  ins_encode( OpcP, RMopc_Mem(secondary,dst) );
7869  ins_pipe( ialu_mem_imm );
7870%}
7871
7872// Arithmetic Shift Right by 8-bit immediate
7873instruct sarI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7874  match(Set dst (RShiftI dst shift));
7875  effect(KILL cr);
7876
7877  size(3);
7878  format %{ "SAR    $dst,$shift" %}
7879  opcode(0xC1, 0x7);  /* C1 /7 ib */
7880  ins_encode( RegOpcImm( dst, shift ) );
7881  ins_pipe( ialu_mem_imm );
7882%}
7883
7884// Arithmetic Shift Right by 8-bit immediate
7885instruct sarI_mem_imm(memory dst, immI8 shift, eFlagsReg cr) %{
7886  match(Set dst (StoreI dst (RShiftI (LoadI dst) shift)));
7887  effect(KILL cr);
7888
7889  format %{ "SAR    $dst,$shift" %}
7890  opcode(0xC1, 0x7);  /* C1 /7 ib */
7891  ins_encode( OpcP, RMopc_Mem(secondary, dst ), Con8or32( shift ) );
7892  ins_pipe( ialu_mem_imm );
7893%}
7894
7895// Arithmetic Shift Right by variable
7896instruct sarI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7897  match(Set dst (RShiftI dst shift));
7898  effect(KILL cr);
7899
7900  size(2);
7901  format %{ "SAR    $dst,$shift" %}
7902  opcode(0xD3, 0x7);  /* D3 /7 */
7903  ins_encode( OpcP, RegOpc( dst ) );
7904  ins_pipe( ialu_reg_reg );
7905%}
7906
7907// Logical shift right by one
7908instruct shrI_eReg_1(rRegI dst, immI1 shift, eFlagsReg cr) %{
7909  match(Set dst (URShiftI dst shift));
7910  effect(KILL cr);
7911
7912  size(2);
7913  format %{ "SHR    $dst,$shift" %}
7914  opcode(0xD1, 0x5);  /* D1 /5 */
7915  ins_encode( OpcP, RegOpc( dst ) );
7916  ins_pipe( ialu_reg );
7917%}
7918
7919// Logical Shift Right by 8-bit immediate
7920instruct shrI_eReg_imm(rRegI dst, immI8 shift, eFlagsReg cr) %{
7921  match(Set dst (URShiftI dst shift));
7922  effect(KILL cr);
7923
7924  size(3);
7925  format %{ "SHR    $dst,$shift" %}
7926  opcode(0xC1, 0x5);  /* C1 /5 ib */
7927  ins_encode( RegOpcImm( dst, shift) );
7928  ins_pipe( ialu_reg );
7929%}
7930
7931
7932// Logical Shift Right by 24, followed by Arithmetic Shift Left by 24.
7933// This idiom is used by the compiler for the i2b bytecode.
7934instruct i2b(rRegI dst, xRegI src, immI_24 twentyfour) %{
7935  match(Set dst (RShiftI (LShiftI src twentyfour) twentyfour));
7936
7937  size(3);
7938  format %{ "MOVSX  $dst,$src :8" %}
7939  ins_encode %{
7940    __ movsbl($dst$$Register, $src$$Register);
7941  %}
7942  ins_pipe(ialu_reg_reg);
7943%}
7944
7945// Logical Shift Right by 16, followed by Arithmetic Shift Left by 16.
7946// This idiom is used by the compiler the i2s bytecode.
7947instruct i2s(rRegI dst, xRegI src, immI_16 sixteen) %{
7948  match(Set dst (RShiftI (LShiftI src sixteen) sixteen));
7949
7950  size(3);
7951  format %{ "MOVSX  $dst,$src :16" %}
7952  ins_encode %{
7953    __ movswl($dst$$Register, $src$$Register);
7954  %}
7955  ins_pipe(ialu_reg_reg);
7956%}
7957
7958
7959// Logical Shift Right by variable
7960instruct shrI_eReg_CL(rRegI dst, eCXRegI shift, eFlagsReg cr) %{
7961  match(Set dst (URShiftI dst shift));
7962  effect(KILL cr);
7963
7964  size(2);
7965  format %{ "SHR    $dst,$shift" %}
7966  opcode(0xD3, 0x5);  /* D3 /5 */
7967  ins_encode( OpcP, RegOpc( dst ) );
7968  ins_pipe( ialu_reg_reg );
7969%}
7970
7971
7972//----------Logical Instructions-----------------------------------------------
7973//----------Integer Logical Instructions---------------------------------------
7974// And Instructions
7975// And Register with Register
7976instruct andI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
7977  match(Set dst (AndI dst src));
7978  effect(KILL cr);
7979
7980  size(2);
7981  format %{ "AND    $dst,$src" %}
7982  opcode(0x23);
7983  ins_encode( OpcP, RegReg( dst, src) );
7984  ins_pipe( ialu_reg_reg );
7985%}
7986
7987// And Register with Immediate
7988instruct andI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
7989  match(Set dst (AndI dst src));
7990  effect(KILL cr);
7991
7992  format %{ "AND    $dst,$src" %}
7993  opcode(0x81,0x04);  /* Opcode 81 /4 */
7994  // ins_encode( RegImm( dst, src) );
7995  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
7996  ins_pipe( ialu_reg );
7997%}
7998
7999// And Register with Memory
8000instruct andI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8001  match(Set dst (AndI dst (LoadI src)));
8002  effect(KILL cr);
8003
8004  ins_cost(125);
8005  format %{ "AND    $dst,$src" %}
8006  opcode(0x23);
8007  ins_encode( OpcP, RegMem( dst, src) );
8008  ins_pipe( ialu_reg_mem );
8009%}
8010
8011// And Memory with Register
8012instruct andI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8013  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8014  effect(KILL cr);
8015
8016  ins_cost(150);
8017  format %{ "AND    $dst,$src" %}
8018  opcode(0x21);  /* Opcode 21 /r */
8019  ins_encode( OpcP, RegMem( src, dst ) );
8020  ins_pipe( ialu_mem_reg );
8021%}
8022
8023// And Memory with Immediate
8024instruct andI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8025  match(Set dst (StoreI dst (AndI (LoadI dst) src)));
8026  effect(KILL cr);
8027
8028  ins_cost(125);
8029  format %{ "AND    $dst,$src" %}
8030  opcode(0x81, 0x4);  /* Opcode 81 /4 id */
8031  // ins_encode( MemImm( dst, src) );
8032  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8033  ins_pipe( ialu_mem_imm );
8034%}
8035
8036// BMI1 instructions
8037instruct andnI_rReg_rReg_rReg(rRegI dst, rRegI src1, rRegI src2, immI_M1 minus_1, eFlagsReg cr) %{
8038  match(Set dst (AndI (XorI src1 minus_1) src2));
8039  predicate(UseBMI1Instructions);
8040  effect(KILL cr);
8041
8042  format %{ "ANDNL  $dst, $src1, $src2" %}
8043
8044  ins_encode %{
8045    __ andnl($dst$$Register, $src1$$Register, $src2$$Register);
8046  %}
8047  ins_pipe(ialu_reg);
8048%}
8049
8050instruct andnI_rReg_rReg_mem(rRegI dst, rRegI src1, memory src2, immI_M1 minus_1, eFlagsReg cr) %{
8051  match(Set dst (AndI (XorI src1 minus_1) (LoadI src2) ));
8052  predicate(UseBMI1Instructions);
8053  effect(KILL cr);
8054
8055  ins_cost(125);
8056  format %{ "ANDNL  $dst, $src1, $src2" %}
8057
8058  ins_encode %{
8059    __ andnl($dst$$Register, $src1$$Register, $src2$$Address);
8060  %}
8061  ins_pipe(ialu_reg_mem);
8062%}
8063
8064instruct blsiI_rReg_rReg(rRegI dst, rRegI src, immI0 imm_zero, eFlagsReg cr) %{
8065  match(Set dst (AndI (SubI imm_zero src) src));
8066  predicate(UseBMI1Instructions);
8067  effect(KILL cr);
8068
8069  format %{ "BLSIL  $dst, $src" %}
8070
8071  ins_encode %{
8072    __ blsil($dst$$Register, $src$$Register);
8073  %}
8074  ins_pipe(ialu_reg);
8075%}
8076
8077instruct blsiI_rReg_mem(rRegI dst, memory src, immI0 imm_zero, eFlagsReg cr) %{
8078  match(Set dst (AndI (SubI imm_zero (LoadI src) ) (LoadI src) ));
8079  predicate(UseBMI1Instructions);
8080  effect(KILL cr);
8081
8082  ins_cost(125);
8083  format %{ "BLSIL  $dst, $src" %}
8084
8085  ins_encode %{
8086    __ blsil($dst$$Register, $src$$Address);
8087  %}
8088  ins_pipe(ialu_reg_mem);
8089%}
8090
8091instruct blsmskI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8092%{
8093  match(Set dst (XorI (AddI src minus_1) src));
8094  predicate(UseBMI1Instructions);
8095  effect(KILL cr);
8096
8097  format %{ "BLSMSKL $dst, $src" %}
8098
8099  ins_encode %{
8100    __ blsmskl($dst$$Register, $src$$Register);
8101  %}
8102
8103  ins_pipe(ialu_reg);
8104%}
8105
8106instruct blsmskI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8107%{
8108  match(Set dst (XorI (AddI (LoadI src) minus_1) (LoadI src) ));
8109  predicate(UseBMI1Instructions);
8110  effect(KILL cr);
8111
8112  ins_cost(125);
8113  format %{ "BLSMSKL $dst, $src" %}
8114
8115  ins_encode %{
8116    __ blsmskl($dst$$Register, $src$$Address);
8117  %}
8118
8119  ins_pipe(ialu_reg_mem);
8120%}
8121
8122instruct blsrI_rReg_rReg(rRegI dst, rRegI src, immI_M1 minus_1, eFlagsReg cr)
8123%{
8124  match(Set dst (AndI (AddI src minus_1) src) );
8125  predicate(UseBMI1Instructions);
8126  effect(KILL cr);
8127
8128  format %{ "BLSRL  $dst, $src" %}
8129
8130  ins_encode %{
8131    __ blsrl($dst$$Register, $src$$Register);
8132  %}
8133
8134  ins_pipe(ialu_reg);
8135%}
8136
8137instruct blsrI_rReg_mem(rRegI dst, memory src, immI_M1 minus_1, eFlagsReg cr)
8138%{
8139  match(Set dst (AndI (AddI (LoadI src) minus_1) (LoadI src) ));
8140  predicate(UseBMI1Instructions);
8141  effect(KILL cr);
8142
8143  ins_cost(125);
8144  format %{ "BLSRL  $dst, $src" %}
8145
8146  ins_encode %{
8147    __ blsrl($dst$$Register, $src$$Address);
8148  %}
8149
8150  ins_pipe(ialu_reg_mem);
8151%}
8152
8153// Or Instructions
8154// Or Register with Register
8155instruct orI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8156  match(Set dst (OrI dst src));
8157  effect(KILL cr);
8158
8159  size(2);
8160  format %{ "OR     $dst,$src" %}
8161  opcode(0x0B);
8162  ins_encode( OpcP, RegReg( dst, src) );
8163  ins_pipe( ialu_reg_reg );
8164%}
8165
8166instruct orI_eReg_castP2X(rRegI dst, eRegP src, eFlagsReg cr) %{
8167  match(Set dst (OrI dst (CastP2X src)));
8168  effect(KILL cr);
8169
8170  size(2);
8171  format %{ "OR     $dst,$src" %}
8172  opcode(0x0B);
8173  ins_encode( OpcP, RegReg( dst, src) );
8174  ins_pipe( ialu_reg_reg );
8175%}
8176
8177
8178// Or Register with Immediate
8179instruct orI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8180  match(Set dst (OrI dst src));
8181  effect(KILL cr);
8182
8183  format %{ "OR     $dst,$src" %}
8184  opcode(0x81,0x01);  /* Opcode 81 /1 id */
8185  // ins_encode( RegImm( dst, src) );
8186  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8187  ins_pipe( ialu_reg );
8188%}
8189
8190// Or Register with Memory
8191instruct orI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8192  match(Set dst (OrI dst (LoadI src)));
8193  effect(KILL cr);
8194
8195  ins_cost(125);
8196  format %{ "OR     $dst,$src" %}
8197  opcode(0x0B);
8198  ins_encode( OpcP, RegMem( dst, src) );
8199  ins_pipe( ialu_reg_mem );
8200%}
8201
8202// Or Memory with Register
8203instruct orI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8204  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8205  effect(KILL cr);
8206
8207  ins_cost(150);
8208  format %{ "OR     $dst,$src" %}
8209  opcode(0x09);  /* Opcode 09 /r */
8210  ins_encode( OpcP, RegMem( src, dst ) );
8211  ins_pipe( ialu_mem_reg );
8212%}
8213
8214// Or Memory with Immediate
8215instruct orI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8216  match(Set dst (StoreI dst (OrI (LoadI dst) src)));
8217  effect(KILL cr);
8218
8219  ins_cost(125);
8220  format %{ "OR     $dst,$src" %}
8221  opcode(0x81,0x1);  /* Opcode 81 /1 id */
8222  // ins_encode( MemImm( dst, src) );
8223  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8224  ins_pipe( ialu_mem_imm );
8225%}
8226
8227// ROL/ROR
8228// ROL expand
8229instruct rolI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8230  effect(USE_DEF dst, USE shift, KILL cr);
8231
8232  format %{ "ROL    $dst, $shift" %}
8233  opcode(0xD1, 0x0); /* Opcode D1 /0 */
8234  ins_encode( OpcP, RegOpc( dst ));
8235  ins_pipe( ialu_reg );
8236%}
8237
8238instruct rolI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8239  effect(USE_DEF dst, USE shift, KILL cr);
8240
8241  format %{ "ROL    $dst, $shift" %}
8242  opcode(0xC1, 0x0); /*Opcode /C1  /0  */
8243  ins_encode( RegOpcImm(dst, shift) );
8244  ins_pipe(ialu_reg);
8245%}
8246
8247instruct rolI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr) %{
8248  effect(USE_DEF dst, USE shift, KILL cr);
8249
8250  format %{ "ROL    $dst, $shift" %}
8251  opcode(0xD3, 0x0);    /* Opcode D3 /0 */
8252  ins_encode(OpcP, RegOpc(dst));
8253  ins_pipe( ialu_reg_reg );
8254%}
8255// end of ROL expand
8256
8257// ROL 32bit by one once
8258instruct rolI_eReg_i1(rRegI dst, immI1 lshift, immI_M1 rshift, eFlagsReg cr) %{
8259  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8260
8261  expand %{
8262    rolI_eReg_imm1(dst, lshift, cr);
8263  %}
8264%}
8265
8266// ROL 32bit var by imm8 once
8267instruct rolI_eReg_i8(rRegI dst, immI8 lshift, immI8 rshift, eFlagsReg cr) %{
8268  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8269  match(Set dst ( OrI (LShiftI dst lshift) (URShiftI dst rshift)));
8270
8271  expand %{
8272    rolI_eReg_imm8(dst, lshift, cr);
8273  %}
8274%}
8275
8276// ROL 32bit var by var once
8277instruct rolI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8278  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI zero shift))));
8279
8280  expand %{
8281    rolI_eReg_CL(dst, shift, cr);
8282  %}
8283%}
8284
8285// ROL 32bit var by var once
8286instruct rolI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8287  match(Set dst ( OrI (LShiftI dst shift) (URShiftI dst (SubI c32 shift))));
8288
8289  expand %{
8290    rolI_eReg_CL(dst, shift, cr);
8291  %}
8292%}
8293
8294// ROR expand
8295instruct rorI_eReg_imm1(rRegI dst, immI1 shift, eFlagsReg cr) %{
8296  effect(USE_DEF dst, USE shift, KILL cr);
8297
8298  format %{ "ROR    $dst, $shift" %}
8299  opcode(0xD1,0x1);  /* Opcode D1 /1 */
8300  ins_encode( OpcP, RegOpc( dst ) );
8301  ins_pipe( ialu_reg );
8302%}
8303
8304instruct rorI_eReg_imm8(rRegI dst, immI8 shift, eFlagsReg cr) %{
8305  effect (USE_DEF dst, USE shift, KILL cr);
8306
8307  format %{ "ROR    $dst, $shift" %}
8308  opcode(0xC1, 0x1); /* Opcode /C1 /1 ib */
8309  ins_encode( RegOpcImm(dst, shift) );
8310  ins_pipe( ialu_reg );
8311%}
8312
8313instruct rorI_eReg_CL(ncxRegI dst, eCXRegI shift, eFlagsReg cr)%{
8314  effect(USE_DEF dst, USE shift, KILL cr);
8315
8316  format %{ "ROR    $dst, $shift" %}
8317  opcode(0xD3, 0x1);    /* Opcode D3 /1 */
8318  ins_encode(OpcP, RegOpc(dst));
8319  ins_pipe( ialu_reg_reg );
8320%}
8321// end of ROR expand
8322
8323// ROR right once
8324instruct rorI_eReg_i1(rRegI dst, immI1 rshift, immI_M1 lshift, eFlagsReg cr) %{
8325  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8326
8327  expand %{
8328    rorI_eReg_imm1(dst, rshift, cr);
8329  %}
8330%}
8331
8332// ROR 32bit by immI8 once
8333instruct rorI_eReg_i8(rRegI dst, immI8 rshift, immI8 lshift, eFlagsReg cr) %{
8334  predicate(  0 == ((n->in(1)->in(2)->get_int() + n->in(2)->in(2)->get_int()) & 0x1f));
8335  match(Set dst ( OrI (URShiftI dst rshift) (LShiftI dst lshift)));
8336
8337  expand %{
8338    rorI_eReg_imm8(dst, rshift, cr);
8339  %}
8340%}
8341
8342// ROR 32bit var by var once
8343instruct rorI_eReg_Var_C0(ncxRegI dst, eCXRegI shift, immI0 zero, eFlagsReg cr) %{
8344  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI zero shift))));
8345
8346  expand %{
8347    rorI_eReg_CL(dst, shift, cr);
8348  %}
8349%}
8350
8351// ROR 32bit var by var once
8352instruct rorI_eReg_Var_C32(ncxRegI dst, eCXRegI shift, immI_32 c32, eFlagsReg cr) %{
8353  match(Set dst ( OrI (URShiftI dst shift) (LShiftI dst (SubI c32 shift))));
8354
8355  expand %{
8356    rorI_eReg_CL(dst, shift, cr);
8357  %}
8358%}
8359
8360// Xor Instructions
8361// Xor Register with Register
8362instruct xorI_eReg(rRegI dst, rRegI src, eFlagsReg cr) %{
8363  match(Set dst (XorI dst src));
8364  effect(KILL cr);
8365
8366  size(2);
8367  format %{ "XOR    $dst,$src" %}
8368  opcode(0x33);
8369  ins_encode( OpcP, RegReg( dst, src) );
8370  ins_pipe( ialu_reg_reg );
8371%}
8372
8373// Xor Register with Immediate -1
8374instruct xorI_eReg_im1(rRegI dst, immI_M1 imm) %{
8375  match(Set dst (XorI dst imm));
8376
8377  size(2);
8378  format %{ "NOT    $dst" %}
8379  ins_encode %{
8380     __ notl($dst$$Register);
8381  %}
8382  ins_pipe( ialu_reg );
8383%}
8384
8385// Xor Register with Immediate
8386instruct xorI_eReg_imm(rRegI dst, immI src, eFlagsReg cr) %{
8387  match(Set dst (XorI dst src));
8388  effect(KILL cr);
8389
8390  format %{ "XOR    $dst,$src" %}
8391  opcode(0x81,0x06);  /* Opcode 81 /6 id */
8392  // ins_encode( RegImm( dst, src) );
8393  ins_encode( OpcSErm( dst, src ), Con8or32( src ) );
8394  ins_pipe( ialu_reg );
8395%}
8396
8397// Xor Register with Memory
8398instruct xorI_eReg_mem(rRegI dst, memory src, eFlagsReg cr) %{
8399  match(Set dst (XorI dst (LoadI src)));
8400  effect(KILL cr);
8401
8402  ins_cost(125);
8403  format %{ "XOR    $dst,$src" %}
8404  opcode(0x33);
8405  ins_encode( OpcP, RegMem(dst, src) );
8406  ins_pipe( ialu_reg_mem );
8407%}
8408
8409// Xor Memory with Register
8410instruct xorI_mem_eReg(memory dst, rRegI src, eFlagsReg cr) %{
8411  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8412  effect(KILL cr);
8413
8414  ins_cost(150);
8415  format %{ "XOR    $dst,$src" %}
8416  opcode(0x31);  /* Opcode 31 /r */
8417  ins_encode( OpcP, RegMem( src, dst ) );
8418  ins_pipe( ialu_mem_reg );
8419%}
8420
8421// Xor Memory with Immediate
8422instruct xorI_mem_imm(memory dst, immI src, eFlagsReg cr) %{
8423  match(Set dst (StoreI dst (XorI (LoadI dst) src)));
8424  effect(KILL cr);
8425
8426  ins_cost(125);
8427  format %{ "XOR    $dst,$src" %}
8428  opcode(0x81,0x6);  /* Opcode 81 /6 id */
8429  ins_encode( OpcSE( src ), RMopc_Mem(secondary, dst ), Con8or32( src ) );
8430  ins_pipe( ialu_mem_imm );
8431%}
8432
8433//----------Convert Int to Boolean---------------------------------------------
8434
8435instruct movI_nocopy(rRegI dst, rRegI src) %{
8436  effect( DEF dst, USE src );
8437  format %{ "MOV    $dst,$src" %}
8438  ins_encode( enc_Copy( dst, src) );
8439  ins_pipe( ialu_reg_reg );
8440%}
8441
8442instruct ci2b( rRegI dst, rRegI src, eFlagsReg cr ) %{
8443  effect( USE_DEF dst, USE src, KILL cr );
8444
8445  size(4);
8446  format %{ "NEG    $dst\n\t"
8447            "ADC    $dst,$src" %}
8448  ins_encode( neg_reg(dst),
8449              OpcRegReg(0x13,dst,src) );
8450  ins_pipe( ialu_reg_reg_long );
8451%}
8452
8453instruct convI2B( rRegI dst, rRegI src, eFlagsReg cr ) %{
8454  match(Set dst (Conv2B src));
8455
8456  expand %{
8457    movI_nocopy(dst,src);
8458    ci2b(dst,src,cr);
8459  %}
8460%}
8461
8462instruct movP_nocopy(rRegI dst, eRegP src) %{
8463  effect( DEF dst, USE src );
8464  format %{ "MOV    $dst,$src" %}
8465  ins_encode( enc_Copy( dst, src) );
8466  ins_pipe( ialu_reg_reg );
8467%}
8468
8469instruct cp2b( rRegI dst, eRegP src, eFlagsReg cr ) %{
8470  effect( USE_DEF dst, USE src, KILL cr );
8471  format %{ "NEG    $dst\n\t"
8472            "ADC    $dst,$src" %}
8473  ins_encode( neg_reg(dst),
8474              OpcRegReg(0x13,dst,src) );
8475  ins_pipe( ialu_reg_reg_long );
8476%}
8477
8478instruct convP2B( rRegI dst, eRegP src, eFlagsReg cr ) %{
8479  match(Set dst (Conv2B src));
8480
8481  expand %{
8482    movP_nocopy(dst,src);
8483    cp2b(dst,src,cr);
8484  %}
8485%}
8486
8487instruct cmpLTMask(eCXRegI dst, ncxRegI p, ncxRegI q, eFlagsReg cr) %{
8488  match(Set dst (CmpLTMask p q));
8489  effect(KILL cr);
8490  ins_cost(400);
8491
8492  // SETlt can only use low byte of EAX,EBX, ECX, or EDX as destination
8493  format %{ "XOR    $dst,$dst\n\t"
8494            "CMP    $p,$q\n\t"
8495            "SETlt  $dst\n\t"
8496            "NEG    $dst" %}
8497  ins_encode %{
8498    Register Rp = $p$$Register;
8499    Register Rq = $q$$Register;
8500    Register Rd = $dst$$Register;
8501    Label done;
8502    __ xorl(Rd, Rd);
8503    __ cmpl(Rp, Rq);
8504    __ setb(Assembler::less, Rd);
8505    __ negl(Rd);
8506  %}
8507
8508  ins_pipe(pipe_slow);
8509%}
8510
8511instruct cmpLTMask0(rRegI dst, immI0 zero, eFlagsReg cr) %{
8512  match(Set dst (CmpLTMask dst zero));
8513  effect(DEF dst, KILL cr);
8514  ins_cost(100);
8515
8516  format %{ "SAR    $dst,31\t# cmpLTMask0" %}
8517  ins_encode %{
8518  __ sarl($dst$$Register, 31);
8519  %}
8520  ins_pipe(ialu_reg);
8521%}
8522
8523/* better to save a register than avoid a branch */
8524instruct cadd_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8525  match(Set p (AddI (AndI (CmpLTMask p q) y) (SubI p q)));
8526  effect(KILL cr);
8527  ins_cost(400);
8528  format %{ "SUB    $p,$q\t# cadd_cmpLTMask\n\t"
8529            "JGE    done\n\t"
8530            "ADD    $p,$y\n"
8531            "done:  " %}
8532  ins_encode %{
8533    Register Rp = $p$$Register;
8534    Register Rq = $q$$Register;
8535    Register Ry = $y$$Register;
8536    Label done;
8537    __ subl(Rp, Rq);
8538    __ jccb(Assembler::greaterEqual, done);
8539    __ addl(Rp, Ry);
8540    __ bind(done);
8541  %}
8542
8543  ins_pipe(pipe_cmplt);
8544%}
8545
8546/* better to save a register than avoid a branch */
8547instruct and_cmpLTMask(rRegI p, rRegI q, rRegI y, eFlagsReg cr) %{
8548  match(Set y (AndI (CmpLTMask p q) y));
8549  effect(KILL cr);
8550
8551  ins_cost(300);
8552
8553  format %{ "CMPL     $p, $q\t# and_cmpLTMask\n\t"
8554            "JLT      done\n\t"
8555            "XORL     $y, $y\n"
8556            "done:  " %}
8557  ins_encode %{
8558    Register Rp = $p$$Register;
8559    Register Rq = $q$$Register;
8560    Register Ry = $y$$Register;
8561    Label done;
8562    __ cmpl(Rp, Rq);
8563    __ jccb(Assembler::less, done);
8564    __ xorl(Ry, Ry);
8565    __ bind(done);
8566  %}
8567
8568  ins_pipe(pipe_cmplt);
8569%}
8570
8571/* If I enable this, I encourage spilling in the inner loop of compress.
8572instruct cadd_cmpLTMask_mem(ncxRegI p, ncxRegI q, memory y, eCXRegI tmp, eFlagsReg cr) %{
8573  match(Set p (AddI (AndI (CmpLTMask p q) (LoadI y)) (SubI p q)));
8574*/
8575//----------Overflow Math Instructions-----------------------------------------
8576
8577instruct overflowAddI_eReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8578%{
8579  match(Set cr (OverflowAddI op1 op2));
8580  effect(DEF cr, USE_KILL op1, USE op2);
8581
8582  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8583
8584  ins_encode %{
8585    __ addl($op1$$Register, $op2$$Register);
8586  %}
8587  ins_pipe(ialu_reg_reg);
8588%}
8589
8590instruct overflowAddI_rReg_imm(eFlagsReg cr, eAXRegI op1, immI op2)
8591%{
8592  match(Set cr (OverflowAddI op1 op2));
8593  effect(DEF cr, USE_KILL op1, USE op2);
8594
8595  format %{ "ADD    $op1, $op2\t# overflow check int" %}
8596
8597  ins_encode %{
8598    __ addl($op1$$Register, $op2$$constant);
8599  %}
8600  ins_pipe(ialu_reg_reg);
8601%}
8602
8603instruct overflowSubI_rReg(eFlagsReg cr, rRegI op1, rRegI op2)
8604%{
8605  match(Set cr (OverflowSubI op1 op2));
8606
8607  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8608  ins_encode %{
8609    __ cmpl($op1$$Register, $op2$$Register);
8610  %}
8611  ins_pipe(ialu_reg_reg);
8612%}
8613
8614instruct overflowSubI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2)
8615%{
8616  match(Set cr (OverflowSubI op1 op2));
8617
8618  format %{ "CMP    $op1, $op2\t# overflow check int" %}
8619  ins_encode %{
8620    __ cmpl($op1$$Register, $op2$$constant);
8621  %}
8622  ins_pipe(ialu_reg_reg);
8623%}
8624
8625instruct overflowNegI_rReg(eFlagsReg cr, immI0 zero, eAXRegI op2)
8626%{
8627  match(Set cr (OverflowSubI zero op2));
8628  effect(DEF cr, USE_KILL op2);
8629
8630  format %{ "NEG    $op2\t# overflow check int" %}
8631  ins_encode %{
8632    __ negl($op2$$Register);
8633  %}
8634  ins_pipe(ialu_reg_reg);
8635%}
8636
8637instruct overflowMulI_rReg(eFlagsReg cr, eAXRegI op1, rRegI op2)
8638%{
8639  match(Set cr (OverflowMulI op1 op2));
8640  effect(DEF cr, USE_KILL op1, USE op2);
8641
8642  format %{ "IMUL    $op1, $op2\t# overflow check int" %}
8643  ins_encode %{
8644    __ imull($op1$$Register, $op2$$Register);
8645  %}
8646  ins_pipe(ialu_reg_reg_alu0);
8647%}
8648
8649instruct overflowMulI_rReg_imm(eFlagsReg cr, rRegI op1, immI op2, rRegI tmp)
8650%{
8651  match(Set cr (OverflowMulI op1 op2));
8652  effect(DEF cr, TEMP tmp, USE op1, USE op2);
8653
8654  format %{ "IMUL    $tmp, $op1, $op2\t# overflow check int" %}
8655  ins_encode %{
8656    __ imull($tmp$$Register, $op1$$Register, $op2$$constant);
8657  %}
8658  ins_pipe(ialu_reg_reg_alu0);
8659%}
8660
8661//----------Long Instructions------------------------------------------------
8662// Add Long Register with Register
8663instruct addL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8664  match(Set dst (AddL dst src));
8665  effect(KILL cr);
8666  ins_cost(200);
8667  format %{ "ADD    $dst.lo,$src.lo\n\t"
8668            "ADC    $dst.hi,$src.hi" %}
8669  opcode(0x03, 0x13);
8670  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8671  ins_pipe( ialu_reg_reg_long );
8672%}
8673
8674// Add Long Register with Immediate
8675instruct addL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8676  match(Set dst (AddL dst src));
8677  effect(KILL cr);
8678  format %{ "ADD    $dst.lo,$src.lo\n\t"
8679            "ADC    $dst.hi,$src.hi" %}
8680  opcode(0x81,0x00,0x02);  /* Opcode 81 /0, 81 /2 */
8681  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8682  ins_pipe( ialu_reg_long );
8683%}
8684
8685// Add Long Register with Memory
8686instruct addL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8687  match(Set dst (AddL dst (LoadL mem)));
8688  effect(KILL cr);
8689  ins_cost(125);
8690  format %{ "ADD    $dst.lo,$mem\n\t"
8691            "ADC    $dst.hi,$mem+4" %}
8692  opcode(0x03, 0x13);
8693  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8694  ins_pipe( ialu_reg_long_mem );
8695%}
8696
8697// Subtract Long Register with Register.
8698instruct subL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8699  match(Set dst (SubL dst src));
8700  effect(KILL cr);
8701  ins_cost(200);
8702  format %{ "SUB    $dst.lo,$src.lo\n\t"
8703            "SBB    $dst.hi,$src.hi" %}
8704  opcode(0x2B, 0x1B);
8705  ins_encode( RegReg_Lo(dst, src), RegReg_Hi(dst,src) );
8706  ins_pipe( ialu_reg_reg_long );
8707%}
8708
8709// Subtract Long Register with Immediate
8710instruct subL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8711  match(Set dst (SubL dst src));
8712  effect(KILL cr);
8713  format %{ "SUB    $dst.lo,$src.lo\n\t"
8714            "SBB    $dst.hi,$src.hi" %}
8715  opcode(0x81,0x05,0x03);  /* Opcode 81 /5, 81 /3 */
8716  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8717  ins_pipe( ialu_reg_long );
8718%}
8719
8720// Subtract Long Register with Memory
8721instruct subL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8722  match(Set dst (SubL dst (LoadL mem)));
8723  effect(KILL cr);
8724  ins_cost(125);
8725  format %{ "SUB    $dst.lo,$mem\n\t"
8726            "SBB    $dst.hi,$mem+4" %}
8727  opcode(0x2B, 0x1B);
8728  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8729  ins_pipe( ialu_reg_long_mem );
8730%}
8731
8732instruct negL_eReg(eRegL dst, immL0 zero, eFlagsReg cr) %{
8733  match(Set dst (SubL zero dst));
8734  effect(KILL cr);
8735  ins_cost(300);
8736  format %{ "NEG    $dst.hi\n\tNEG    $dst.lo\n\tSBB    $dst.hi,0" %}
8737  ins_encode( neg_long(dst) );
8738  ins_pipe( ialu_reg_reg_long );
8739%}
8740
8741// And Long Register with Register
8742instruct andL_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8743  match(Set dst (AndL dst src));
8744  effect(KILL cr);
8745  format %{ "AND    $dst.lo,$src.lo\n\t"
8746            "AND    $dst.hi,$src.hi" %}
8747  opcode(0x23,0x23);
8748  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8749  ins_pipe( ialu_reg_reg_long );
8750%}
8751
8752// And Long Register with Immediate
8753instruct andL_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8754  match(Set dst (AndL dst src));
8755  effect(KILL cr);
8756  format %{ "AND    $dst.lo,$src.lo\n\t"
8757            "AND    $dst.hi,$src.hi" %}
8758  opcode(0x81,0x04,0x04);  /* Opcode 81 /4, 81 /4 */
8759  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8760  ins_pipe( ialu_reg_long );
8761%}
8762
8763// And Long Register with Memory
8764instruct andL_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
8765  match(Set dst (AndL dst (LoadL mem)));
8766  effect(KILL cr);
8767  ins_cost(125);
8768  format %{ "AND    $dst.lo,$mem\n\t"
8769            "AND    $dst.hi,$mem+4" %}
8770  opcode(0x23, 0x23);
8771  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
8772  ins_pipe( ialu_reg_long_mem );
8773%}
8774
8775// BMI1 instructions
8776instruct andnL_eReg_eReg_eReg(eRegL dst, eRegL src1, eRegL src2, immL_M1 minus_1, eFlagsReg cr) %{
8777  match(Set dst (AndL (XorL src1 minus_1) src2));
8778  predicate(UseBMI1Instructions);
8779  effect(KILL cr, TEMP dst);
8780
8781  format %{ "ANDNL  $dst.lo, $src1.lo, $src2.lo\n\t"
8782            "ANDNL  $dst.hi, $src1.hi, $src2.hi"
8783         %}
8784
8785  ins_encode %{
8786    Register Rdst = $dst$$Register;
8787    Register Rsrc1 = $src1$$Register;
8788    Register Rsrc2 = $src2$$Register;
8789    __ andnl(Rdst, Rsrc1, Rsrc2);
8790    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), HIGH_FROM_LOW(Rsrc2));
8791  %}
8792  ins_pipe(ialu_reg_reg_long);
8793%}
8794
8795instruct andnL_eReg_eReg_mem(eRegL dst, eRegL src1, memory src2, immL_M1 minus_1, eFlagsReg cr) %{
8796  match(Set dst (AndL (XorL src1 minus_1) (LoadL src2) ));
8797  predicate(UseBMI1Instructions);
8798  effect(KILL cr, TEMP dst);
8799
8800  ins_cost(125);
8801  format %{ "ANDNL  $dst.lo, $src1.lo, $src2\n\t"
8802            "ANDNL  $dst.hi, $src1.hi, $src2+4"
8803         %}
8804
8805  ins_encode %{
8806    Register Rdst = $dst$$Register;
8807    Register Rsrc1 = $src1$$Register;
8808    Address src2_hi = Address::make_raw($src2$$base, $src2$$index, $src2$$scale, $src2$$disp + 4, relocInfo::none);
8809
8810    __ andnl(Rdst, Rsrc1, $src2$$Address);
8811    __ andnl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc1), src2_hi);
8812  %}
8813  ins_pipe(ialu_reg_mem);
8814%}
8815
8816instruct blsiL_eReg_eReg(eRegL dst, eRegL src, immL0 imm_zero, eFlagsReg cr) %{
8817  match(Set dst (AndL (SubL imm_zero src) src));
8818  predicate(UseBMI1Instructions);
8819  effect(KILL cr, TEMP dst);
8820
8821  format %{ "MOVL   $dst.hi, 0\n\t"
8822            "BLSIL  $dst.lo, $src.lo\n\t"
8823            "JNZ    done\n\t"
8824            "BLSIL  $dst.hi, $src.hi\n"
8825            "done:"
8826         %}
8827
8828  ins_encode %{
8829    Label done;
8830    Register Rdst = $dst$$Register;
8831    Register Rsrc = $src$$Register;
8832    __ movl(HIGH_FROM_LOW(Rdst), 0);
8833    __ blsil(Rdst, Rsrc);
8834    __ jccb(Assembler::notZero, done);
8835    __ blsil(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8836    __ bind(done);
8837  %}
8838  ins_pipe(ialu_reg);
8839%}
8840
8841instruct blsiL_eReg_mem(eRegL dst, memory src, immL0 imm_zero, eFlagsReg cr) %{
8842  match(Set dst (AndL (SubL imm_zero (LoadL src) ) (LoadL src) ));
8843  predicate(UseBMI1Instructions);
8844  effect(KILL cr, TEMP dst);
8845
8846  ins_cost(125);
8847  format %{ "MOVL   $dst.hi, 0\n\t"
8848            "BLSIL  $dst.lo, $src\n\t"
8849            "JNZ    done\n\t"
8850            "BLSIL  $dst.hi, $src+4\n"
8851            "done:"
8852         %}
8853
8854  ins_encode %{
8855    Label done;
8856    Register Rdst = $dst$$Register;
8857    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8858
8859    __ movl(HIGH_FROM_LOW(Rdst), 0);
8860    __ blsil(Rdst, $src$$Address);
8861    __ jccb(Assembler::notZero, done);
8862    __ blsil(HIGH_FROM_LOW(Rdst), src_hi);
8863    __ bind(done);
8864  %}
8865  ins_pipe(ialu_reg_mem);
8866%}
8867
8868instruct blsmskL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8869%{
8870  match(Set dst (XorL (AddL src minus_1) src));
8871  predicate(UseBMI1Instructions);
8872  effect(KILL cr, TEMP dst);
8873
8874  format %{ "MOVL    $dst.hi, 0\n\t"
8875            "BLSMSKL $dst.lo, $src.lo\n\t"
8876            "JNC     done\n\t"
8877            "BLSMSKL $dst.hi, $src.hi\n"
8878            "done:"
8879         %}
8880
8881  ins_encode %{
8882    Label done;
8883    Register Rdst = $dst$$Register;
8884    Register Rsrc = $src$$Register;
8885    __ movl(HIGH_FROM_LOW(Rdst), 0);
8886    __ blsmskl(Rdst, Rsrc);
8887    __ jccb(Assembler::carryClear, done);
8888    __ blsmskl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8889    __ bind(done);
8890  %}
8891
8892  ins_pipe(ialu_reg);
8893%}
8894
8895instruct blsmskL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8896%{
8897  match(Set dst (XorL (AddL (LoadL src) minus_1) (LoadL src) ));
8898  predicate(UseBMI1Instructions);
8899  effect(KILL cr, TEMP dst);
8900
8901  ins_cost(125);
8902  format %{ "MOVL    $dst.hi, 0\n\t"
8903            "BLSMSKL $dst.lo, $src\n\t"
8904            "JNC     done\n\t"
8905            "BLSMSKL $dst.hi, $src+4\n"
8906            "done:"
8907         %}
8908
8909  ins_encode %{
8910    Label done;
8911    Register Rdst = $dst$$Register;
8912    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8913
8914    __ movl(HIGH_FROM_LOW(Rdst), 0);
8915    __ blsmskl(Rdst, $src$$Address);
8916    __ jccb(Assembler::carryClear, done);
8917    __ blsmskl(HIGH_FROM_LOW(Rdst), src_hi);
8918    __ bind(done);
8919  %}
8920
8921  ins_pipe(ialu_reg_mem);
8922%}
8923
8924instruct blsrL_eReg_eReg(eRegL dst, eRegL src, immL_M1 minus_1, eFlagsReg cr)
8925%{
8926  match(Set dst (AndL (AddL src minus_1) src) );
8927  predicate(UseBMI1Instructions);
8928  effect(KILL cr, TEMP dst);
8929
8930  format %{ "MOVL   $dst.hi, $src.hi\n\t"
8931            "BLSRL  $dst.lo, $src.lo\n\t"
8932            "JNC    done\n\t"
8933            "BLSRL  $dst.hi, $src.hi\n"
8934            "done:"
8935  %}
8936
8937  ins_encode %{
8938    Label done;
8939    Register Rdst = $dst$$Register;
8940    Register Rsrc = $src$$Register;
8941    __ movl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8942    __ blsrl(Rdst, Rsrc);
8943    __ jccb(Assembler::carryClear, done);
8944    __ blsrl(HIGH_FROM_LOW(Rdst), HIGH_FROM_LOW(Rsrc));
8945    __ bind(done);
8946  %}
8947
8948  ins_pipe(ialu_reg);
8949%}
8950
8951instruct blsrL_eReg_mem(eRegL dst, memory src, immL_M1 minus_1, eFlagsReg cr)
8952%{
8953  match(Set dst (AndL (AddL (LoadL src) minus_1) (LoadL src) ));
8954  predicate(UseBMI1Instructions);
8955  effect(KILL cr, TEMP dst);
8956
8957  ins_cost(125);
8958  format %{ "MOVL   $dst.hi, $src+4\n\t"
8959            "BLSRL  $dst.lo, $src\n\t"
8960            "JNC    done\n\t"
8961            "BLSRL  $dst.hi, $src+4\n"
8962            "done:"
8963  %}
8964
8965  ins_encode %{
8966    Label done;
8967    Register Rdst = $dst$$Register;
8968    Address src_hi = Address::make_raw($src$$base, $src$$index, $src$$scale, $src$$disp + 4, relocInfo::none);
8969    __ movl(HIGH_FROM_LOW(Rdst), src_hi);
8970    __ blsrl(Rdst, $src$$Address);
8971    __ jccb(Assembler::carryClear, done);
8972    __ blsrl(HIGH_FROM_LOW(Rdst), src_hi);
8973    __ bind(done);
8974  %}
8975
8976  ins_pipe(ialu_reg_mem);
8977%}
8978
8979// Or Long Register with Register
8980instruct orl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
8981  match(Set dst (OrL dst src));
8982  effect(KILL cr);
8983  format %{ "OR     $dst.lo,$src.lo\n\t"
8984            "OR     $dst.hi,$src.hi" %}
8985  opcode(0x0B,0x0B);
8986  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
8987  ins_pipe( ialu_reg_reg_long );
8988%}
8989
8990// Or Long Register with Immediate
8991instruct orl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
8992  match(Set dst (OrL dst src));
8993  effect(KILL cr);
8994  format %{ "OR     $dst.lo,$src.lo\n\t"
8995            "OR     $dst.hi,$src.hi" %}
8996  opcode(0x81,0x01,0x01);  /* Opcode 81 /1, 81 /1 */
8997  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
8998  ins_pipe( ialu_reg_long );
8999%}
9000
9001// Or Long Register with Memory
9002instruct orl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9003  match(Set dst (OrL dst (LoadL mem)));
9004  effect(KILL cr);
9005  ins_cost(125);
9006  format %{ "OR     $dst.lo,$mem\n\t"
9007            "OR     $dst.hi,$mem+4" %}
9008  opcode(0x0B,0x0B);
9009  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9010  ins_pipe( ialu_reg_long_mem );
9011%}
9012
9013// Xor Long Register with Register
9014instruct xorl_eReg(eRegL dst, eRegL src, eFlagsReg cr) %{
9015  match(Set dst (XorL dst src));
9016  effect(KILL cr);
9017  format %{ "XOR    $dst.lo,$src.lo\n\t"
9018            "XOR    $dst.hi,$src.hi" %}
9019  opcode(0x33,0x33);
9020  ins_encode( RegReg_Lo( dst, src), RegReg_Hi( dst, src) );
9021  ins_pipe( ialu_reg_reg_long );
9022%}
9023
9024// Xor Long Register with Immediate -1
9025instruct xorl_eReg_im1(eRegL dst, immL_M1 imm) %{
9026  match(Set dst (XorL dst imm));
9027  format %{ "NOT    $dst.lo\n\t"
9028            "NOT    $dst.hi" %}
9029  ins_encode %{
9030     __ notl($dst$$Register);
9031     __ notl(HIGH_FROM_LOW($dst$$Register));
9032  %}
9033  ins_pipe( ialu_reg_long );
9034%}
9035
9036// Xor Long Register with Immediate
9037instruct xorl_eReg_imm(eRegL dst, immL src, eFlagsReg cr) %{
9038  match(Set dst (XorL dst src));
9039  effect(KILL cr);
9040  format %{ "XOR    $dst.lo,$src.lo\n\t"
9041            "XOR    $dst.hi,$src.hi" %}
9042  opcode(0x81,0x06,0x06);  /* Opcode 81 /6, 81 /6 */
9043  ins_encode( Long_OpcSErm_Lo( dst, src ), Long_OpcSErm_Hi( dst, src ) );
9044  ins_pipe( ialu_reg_long );
9045%}
9046
9047// Xor Long Register with Memory
9048instruct xorl_eReg_mem(eRegL dst, load_long_memory mem, eFlagsReg cr) %{
9049  match(Set dst (XorL dst (LoadL mem)));
9050  effect(KILL cr);
9051  ins_cost(125);
9052  format %{ "XOR    $dst.lo,$mem\n\t"
9053            "XOR    $dst.hi,$mem+4" %}
9054  opcode(0x33,0x33);
9055  ins_encode( OpcP, RegMem( dst, mem), OpcS, RegMem_Hi(dst,mem) );
9056  ins_pipe( ialu_reg_long_mem );
9057%}
9058
9059// Shift Left Long by 1
9060instruct shlL_eReg_1(eRegL dst, immI_1 cnt, eFlagsReg cr) %{
9061  predicate(UseNewLongLShift);
9062  match(Set dst (LShiftL dst cnt));
9063  effect(KILL cr);
9064  ins_cost(100);
9065  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9066            "ADC    $dst.hi,$dst.hi" %}
9067  ins_encode %{
9068    __ addl($dst$$Register,$dst$$Register);
9069    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9070  %}
9071  ins_pipe( ialu_reg_long );
9072%}
9073
9074// Shift Left Long by 2
9075instruct shlL_eReg_2(eRegL dst, immI_2 cnt, eFlagsReg cr) %{
9076  predicate(UseNewLongLShift);
9077  match(Set dst (LShiftL dst cnt));
9078  effect(KILL cr);
9079  ins_cost(100);
9080  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9081            "ADC    $dst.hi,$dst.hi\n\t"
9082            "ADD    $dst.lo,$dst.lo\n\t"
9083            "ADC    $dst.hi,$dst.hi" %}
9084  ins_encode %{
9085    __ addl($dst$$Register,$dst$$Register);
9086    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9087    __ addl($dst$$Register,$dst$$Register);
9088    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9089  %}
9090  ins_pipe( ialu_reg_long );
9091%}
9092
9093// Shift Left Long by 3
9094instruct shlL_eReg_3(eRegL dst, immI_3 cnt, eFlagsReg cr) %{
9095  predicate(UseNewLongLShift);
9096  match(Set dst (LShiftL dst cnt));
9097  effect(KILL cr);
9098  ins_cost(100);
9099  format %{ "ADD    $dst.lo,$dst.lo\n\t"
9100            "ADC    $dst.hi,$dst.hi\n\t"
9101            "ADD    $dst.lo,$dst.lo\n\t"
9102            "ADC    $dst.hi,$dst.hi\n\t"
9103            "ADD    $dst.lo,$dst.lo\n\t"
9104            "ADC    $dst.hi,$dst.hi" %}
9105  ins_encode %{
9106    __ addl($dst$$Register,$dst$$Register);
9107    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9108    __ addl($dst$$Register,$dst$$Register);
9109    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9110    __ addl($dst$$Register,$dst$$Register);
9111    __ adcl(HIGH_FROM_LOW($dst$$Register),HIGH_FROM_LOW($dst$$Register));
9112  %}
9113  ins_pipe( ialu_reg_long );
9114%}
9115
9116// Shift Left Long by 1-31
9117instruct shlL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9118  match(Set dst (LShiftL dst cnt));
9119  effect(KILL cr);
9120  ins_cost(200);
9121  format %{ "SHLD   $dst.hi,$dst.lo,$cnt\n\t"
9122            "SHL    $dst.lo,$cnt" %}
9123  opcode(0xC1, 0x4, 0xA4);  /* 0F/A4, then C1 /4 ib */
9124  ins_encode( move_long_small_shift(dst,cnt) );
9125  ins_pipe( ialu_reg_long );
9126%}
9127
9128// Shift Left Long by 32-63
9129instruct shlL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9130  match(Set dst (LShiftL dst cnt));
9131  effect(KILL cr);
9132  ins_cost(300);
9133  format %{ "MOV    $dst.hi,$dst.lo\n"
9134          "\tSHL    $dst.hi,$cnt-32\n"
9135          "\tXOR    $dst.lo,$dst.lo" %}
9136  opcode(0xC1, 0x4);  /* C1 /4 ib */
9137  ins_encode( move_long_big_shift_clr(dst,cnt) );
9138  ins_pipe( ialu_reg_long );
9139%}
9140
9141// Shift Left Long by variable
9142instruct salL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9143  match(Set dst (LShiftL dst shift));
9144  effect(KILL cr);
9145  ins_cost(500+200);
9146  size(17);
9147  format %{ "TEST   $shift,32\n\t"
9148            "JEQ,s  small\n\t"
9149            "MOV    $dst.hi,$dst.lo\n\t"
9150            "XOR    $dst.lo,$dst.lo\n"
9151    "small:\tSHLD   $dst.hi,$dst.lo,$shift\n\t"
9152            "SHL    $dst.lo,$shift" %}
9153  ins_encode( shift_left_long( dst, shift ) );
9154  ins_pipe( pipe_slow );
9155%}
9156
9157// Shift Right Long by 1-31
9158instruct shrL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9159  match(Set dst (URShiftL dst cnt));
9160  effect(KILL cr);
9161  ins_cost(200);
9162  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9163            "SHR    $dst.hi,$cnt" %}
9164  opcode(0xC1, 0x5, 0xAC);  /* 0F/AC, then C1 /5 ib */
9165  ins_encode( move_long_small_shift(dst,cnt) );
9166  ins_pipe( ialu_reg_long );
9167%}
9168
9169// Shift Right Long by 32-63
9170instruct shrL_eReg_32_63(eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9171  match(Set dst (URShiftL dst cnt));
9172  effect(KILL cr);
9173  ins_cost(300);
9174  format %{ "MOV    $dst.lo,$dst.hi\n"
9175          "\tSHR    $dst.lo,$cnt-32\n"
9176          "\tXOR    $dst.hi,$dst.hi" %}
9177  opcode(0xC1, 0x5);  /* C1 /5 ib */
9178  ins_encode( move_long_big_shift_clr(dst,cnt) );
9179  ins_pipe( ialu_reg_long );
9180%}
9181
9182// Shift Right Long by variable
9183instruct shrL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9184  match(Set dst (URShiftL dst shift));
9185  effect(KILL cr);
9186  ins_cost(600);
9187  size(17);
9188  format %{ "TEST   $shift,32\n\t"
9189            "JEQ,s  small\n\t"
9190            "MOV    $dst.lo,$dst.hi\n\t"
9191            "XOR    $dst.hi,$dst.hi\n"
9192    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9193            "SHR    $dst.hi,$shift" %}
9194  ins_encode( shift_right_long( dst, shift ) );
9195  ins_pipe( pipe_slow );
9196%}
9197
9198// Shift Right Long by 1-31
9199instruct sarL_eReg_1_31(eRegL dst, immI_1_31 cnt, eFlagsReg cr) %{
9200  match(Set dst (RShiftL dst cnt));
9201  effect(KILL cr);
9202  ins_cost(200);
9203  format %{ "SHRD   $dst.lo,$dst.hi,$cnt\n\t"
9204            "SAR    $dst.hi,$cnt" %}
9205  opcode(0xC1, 0x7, 0xAC);  /* 0F/AC, then C1 /7 ib */
9206  ins_encode( move_long_small_shift(dst,cnt) );
9207  ins_pipe( ialu_reg_long );
9208%}
9209
9210// Shift Right Long by 32-63
9211instruct sarL_eReg_32_63( eRegL dst, immI_32_63 cnt, eFlagsReg cr) %{
9212  match(Set dst (RShiftL dst cnt));
9213  effect(KILL cr);
9214  ins_cost(300);
9215  format %{ "MOV    $dst.lo,$dst.hi\n"
9216          "\tSAR    $dst.lo,$cnt-32\n"
9217          "\tSAR    $dst.hi,31" %}
9218  opcode(0xC1, 0x7);  /* C1 /7 ib */
9219  ins_encode( move_long_big_shift_sign(dst,cnt) );
9220  ins_pipe( ialu_reg_long );
9221%}
9222
9223// Shift Right arithmetic Long by variable
9224instruct sarL_eReg_CL(eRegL dst, eCXRegI shift, eFlagsReg cr) %{
9225  match(Set dst (RShiftL dst shift));
9226  effect(KILL cr);
9227  ins_cost(600);
9228  size(18);
9229  format %{ "TEST   $shift,32\n\t"
9230            "JEQ,s  small\n\t"
9231            "MOV    $dst.lo,$dst.hi\n\t"
9232            "SAR    $dst.hi,31\n"
9233    "small:\tSHRD   $dst.lo,$dst.hi,$shift\n\t"
9234            "SAR    $dst.hi,$shift" %}
9235  ins_encode( shift_right_arith_long( dst, shift ) );
9236  ins_pipe( pipe_slow );
9237%}
9238
9239
9240//----------Double Instructions------------------------------------------------
9241// Double Math
9242
9243// Compare & branch
9244
9245// P6 version of float compare, sets condition codes in EFLAGS
9246instruct cmpDPR_cc_P6(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9247  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9248  match(Set cr (CmpD src1 src2));
9249  effect(KILL rax);
9250  ins_cost(150);
9251  format %{ "FLD    $src1\n\t"
9252            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9253            "JNP    exit\n\t"
9254            "MOV    ah,1       // saw a NaN, set CF\n\t"
9255            "SAHF\n"
9256     "exit:\tNOP               // avoid branch to branch" %}
9257  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9258  ins_encode( Push_Reg_DPR(src1),
9259              OpcP, RegOpc(src2),
9260              cmpF_P6_fixup );
9261  ins_pipe( pipe_slow );
9262%}
9263
9264instruct cmpDPR_cc_P6CF(eFlagsRegUCF cr, regDPR src1, regDPR src2) %{
9265  predicate(VM_Version::supports_cmov() && UseSSE <=1);
9266  match(Set cr (CmpD src1 src2));
9267  ins_cost(150);
9268  format %{ "FLD    $src1\n\t"
9269            "FUCOMIP ST,$src2  // P6 instruction" %}
9270  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9271  ins_encode( Push_Reg_DPR(src1),
9272              OpcP, RegOpc(src2));
9273  ins_pipe( pipe_slow );
9274%}
9275
9276// Compare & branch
9277instruct cmpDPR_cc(eFlagsRegU cr, regDPR src1, regDPR src2, eAXRegI rax) %{
9278  predicate(UseSSE<=1);
9279  match(Set cr (CmpD src1 src2));
9280  effect(KILL rax);
9281  ins_cost(200);
9282  format %{ "FLD    $src1\n\t"
9283            "FCOMp  $src2\n\t"
9284            "FNSTSW AX\n\t"
9285            "TEST   AX,0x400\n\t"
9286            "JZ,s   flags\n\t"
9287            "MOV    AH,1\t# unordered treat as LT\n"
9288    "flags:\tSAHF" %}
9289  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9290  ins_encode( Push_Reg_DPR(src1),
9291              OpcP, RegOpc(src2),
9292              fpu_flags);
9293  ins_pipe( pipe_slow );
9294%}
9295
9296// Compare vs zero into -1,0,1
9297instruct cmpDPR_0(rRegI dst, regDPR src1, immDPR0 zero, eAXRegI rax, eFlagsReg cr) %{
9298  predicate(UseSSE<=1);
9299  match(Set dst (CmpD3 src1 zero));
9300  effect(KILL cr, KILL rax);
9301  ins_cost(280);
9302  format %{ "FTSTD  $dst,$src1" %}
9303  opcode(0xE4, 0xD9);
9304  ins_encode( Push_Reg_DPR(src1),
9305              OpcS, OpcP, PopFPU,
9306              CmpF_Result(dst));
9307  ins_pipe( pipe_slow );
9308%}
9309
9310// Compare into -1,0,1
9311instruct cmpDPR_reg(rRegI dst, regDPR src1, regDPR src2, eAXRegI rax, eFlagsReg cr) %{
9312  predicate(UseSSE<=1);
9313  match(Set dst (CmpD3 src1 src2));
9314  effect(KILL cr, KILL rax);
9315  ins_cost(300);
9316  format %{ "FCMPD  $dst,$src1,$src2" %}
9317  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
9318  ins_encode( Push_Reg_DPR(src1),
9319              OpcP, RegOpc(src2),
9320              CmpF_Result(dst));
9321  ins_pipe( pipe_slow );
9322%}
9323
9324// float compare and set condition codes in EFLAGS by XMM regs
9325instruct cmpD_cc(eFlagsRegU cr, regD src1, regD src2) %{
9326  predicate(UseSSE>=2);
9327  match(Set cr (CmpD src1 src2));
9328  ins_cost(145);
9329  format %{ "UCOMISD $src1,$src2\n\t"
9330            "JNP,s   exit\n\t"
9331            "PUSHF\t# saw NaN, set CF\n\t"
9332            "AND     [rsp], #0xffffff2b\n\t"
9333            "POPF\n"
9334    "exit:" %}
9335  ins_encode %{
9336    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9337    emit_cmpfp_fixup(_masm);
9338  %}
9339  ins_pipe( pipe_slow );
9340%}
9341
9342instruct cmpD_ccCF(eFlagsRegUCF cr, regD src1, regD src2) %{
9343  predicate(UseSSE>=2);
9344  match(Set cr (CmpD src1 src2));
9345  ins_cost(100);
9346  format %{ "UCOMISD $src1,$src2" %}
9347  ins_encode %{
9348    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9349  %}
9350  ins_pipe( pipe_slow );
9351%}
9352
9353// float compare and set condition codes in EFLAGS by XMM regs
9354instruct cmpD_ccmem(eFlagsRegU cr, regD src1, memory src2) %{
9355  predicate(UseSSE>=2);
9356  match(Set cr (CmpD src1 (LoadD src2)));
9357  ins_cost(145);
9358  format %{ "UCOMISD $src1,$src2\n\t"
9359            "JNP,s   exit\n\t"
9360            "PUSHF\t# saw NaN, set CF\n\t"
9361            "AND     [rsp], #0xffffff2b\n\t"
9362            "POPF\n"
9363    "exit:" %}
9364  ins_encode %{
9365    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9366    emit_cmpfp_fixup(_masm);
9367  %}
9368  ins_pipe( pipe_slow );
9369%}
9370
9371instruct cmpD_ccmemCF(eFlagsRegUCF cr, regD src1, memory src2) %{
9372  predicate(UseSSE>=2);
9373  match(Set cr (CmpD src1 (LoadD src2)));
9374  ins_cost(100);
9375  format %{ "UCOMISD $src1,$src2" %}
9376  ins_encode %{
9377    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9378  %}
9379  ins_pipe( pipe_slow );
9380%}
9381
9382// Compare into -1,0,1 in XMM
9383instruct cmpD_reg(xRegI dst, regD src1, regD src2, eFlagsReg cr) %{
9384  predicate(UseSSE>=2);
9385  match(Set dst (CmpD3 src1 src2));
9386  effect(KILL cr);
9387  ins_cost(255);
9388  format %{ "UCOMISD $src1, $src2\n\t"
9389            "MOV     $dst, #-1\n\t"
9390            "JP,s    done\n\t"
9391            "JB,s    done\n\t"
9392            "SETNE   $dst\n\t"
9393            "MOVZB   $dst, $dst\n"
9394    "done:" %}
9395  ins_encode %{
9396    __ ucomisd($src1$$XMMRegister, $src2$$XMMRegister);
9397    emit_cmpfp3(_masm, $dst$$Register);
9398  %}
9399  ins_pipe( pipe_slow );
9400%}
9401
9402// Compare into -1,0,1 in XMM and memory
9403instruct cmpD_regmem(xRegI dst, regD src1, memory src2, eFlagsReg cr) %{
9404  predicate(UseSSE>=2);
9405  match(Set dst (CmpD3 src1 (LoadD src2)));
9406  effect(KILL cr);
9407  ins_cost(275);
9408  format %{ "UCOMISD $src1, $src2\n\t"
9409            "MOV     $dst, #-1\n\t"
9410            "JP,s    done\n\t"
9411            "JB,s    done\n\t"
9412            "SETNE   $dst\n\t"
9413            "MOVZB   $dst, $dst\n"
9414    "done:" %}
9415  ins_encode %{
9416    __ ucomisd($src1$$XMMRegister, $src2$$Address);
9417    emit_cmpfp3(_masm, $dst$$Register);
9418  %}
9419  ins_pipe( pipe_slow );
9420%}
9421
9422
9423instruct subDPR_reg(regDPR dst, regDPR src) %{
9424  predicate (UseSSE <=1);
9425  match(Set dst (SubD dst src));
9426
9427  format %{ "FLD    $src\n\t"
9428            "DSUBp  $dst,ST" %}
9429  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
9430  ins_cost(150);
9431  ins_encode( Push_Reg_DPR(src),
9432              OpcP, RegOpc(dst) );
9433  ins_pipe( fpu_reg_reg );
9434%}
9435
9436instruct subDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9437  predicate (UseSSE <=1);
9438  match(Set dst (RoundDouble (SubD src1 src2)));
9439  ins_cost(250);
9440
9441  format %{ "FLD    $src2\n\t"
9442            "DSUB   ST,$src1\n\t"
9443            "FSTP_D $dst\t# D-round" %}
9444  opcode(0xD8, 0x5);
9445  ins_encode( Push_Reg_DPR(src2),
9446              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9447  ins_pipe( fpu_mem_reg_reg );
9448%}
9449
9450
9451instruct subDPR_reg_mem(regDPR dst, memory src) %{
9452  predicate (UseSSE <=1);
9453  match(Set dst (SubD dst (LoadD src)));
9454  ins_cost(150);
9455
9456  format %{ "FLD    $src\n\t"
9457            "DSUBp  $dst,ST" %}
9458  opcode(0xDE, 0x5, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9459  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9460              OpcP, RegOpc(dst) );
9461  ins_pipe( fpu_reg_mem );
9462%}
9463
9464instruct absDPR_reg(regDPR1 dst, regDPR1 src) %{
9465  predicate (UseSSE<=1);
9466  match(Set dst (AbsD src));
9467  ins_cost(100);
9468  format %{ "FABS" %}
9469  opcode(0xE1, 0xD9);
9470  ins_encode( OpcS, OpcP );
9471  ins_pipe( fpu_reg_reg );
9472%}
9473
9474instruct negDPR_reg(regDPR1 dst, regDPR1 src) %{
9475  predicate(UseSSE<=1);
9476  match(Set dst (NegD src));
9477  ins_cost(100);
9478  format %{ "FCHS" %}
9479  opcode(0xE0, 0xD9);
9480  ins_encode( OpcS, OpcP );
9481  ins_pipe( fpu_reg_reg );
9482%}
9483
9484instruct addDPR_reg(regDPR dst, regDPR src) %{
9485  predicate(UseSSE<=1);
9486  match(Set dst (AddD dst src));
9487  format %{ "FLD    $src\n\t"
9488            "DADD   $dst,ST" %}
9489  size(4);
9490  ins_cost(150);
9491  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
9492  ins_encode( Push_Reg_DPR(src),
9493              OpcP, RegOpc(dst) );
9494  ins_pipe( fpu_reg_reg );
9495%}
9496
9497
9498instruct addDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9499  predicate(UseSSE<=1);
9500  match(Set dst (RoundDouble (AddD src1 src2)));
9501  ins_cost(250);
9502
9503  format %{ "FLD    $src2\n\t"
9504            "DADD   ST,$src1\n\t"
9505            "FSTP_D $dst\t# D-round" %}
9506  opcode(0xD8, 0x0); /* D8 C0+i or D8 /0*/
9507  ins_encode( Push_Reg_DPR(src2),
9508              OpcP, RegOpc(src1), Pop_Mem_DPR(dst) );
9509  ins_pipe( fpu_mem_reg_reg );
9510%}
9511
9512
9513instruct addDPR_reg_mem(regDPR dst, memory src) %{
9514  predicate(UseSSE<=1);
9515  match(Set dst (AddD dst (LoadD src)));
9516  ins_cost(150);
9517
9518  format %{ "FLD    $src\n\t"
9519            "DADDp  $dst,ST" %}
9520  opcode(0xDE, 0x0, 0xDD); /* DE C0+i */  /* LoadD  DD /0 */
9521  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9522              OpcP, RegOpc(dst) );
9523  ins_pipe( fpu_reg_mem );
9524%}
9525
9526// add-to-memory
9527instruct addDPR_mem_reg(memory dst, regDPR src) %{
9528  predicate(UseSSE<=1);
9529  match(Set dst (StoreD dst (RoundDouble (AddD (LoadD dst) src))));
9530  ins_cost(150);
9531
9532  format %{ "FLD_D  $dst\n\t"
9533            "DADD   ST,$src\n\t"
9534            "FST_D  $dst" %}
9535  opcode(0xDD, 0x0);
9536  ins_encode( Opcode(0xDD), RMopc_Mem(0x00,dst),
9537              Opcode(0xD8), RegOpc(src),
9538              set_instruction_start,
9539              Opcode(0xDD), RMopc_Mem(0x03,dst) );
9540  ins_pipe( fpu_reg_mem );
9541%}
9542
9543instruct addDPR_reg_imm1(regDPR dst, immDPR1 con) %{
9544  predicate(UseSSE<=1);
9545  match(Set dst (AddD dst con));
9546  ins_cost(125);
9547  format %{ "FLD1\n\t"
9548            "DADDp  $dst,ST" %}
9549  ins_encode %{
9550    __ fld1();
9551    __ faddp($dst$$reg);
9552  %}
9553  ins_pipe(fpu_reg);
9554%}
9555
9556instruct addDPR_reg_imm(regDPR dst, immDPR con) %{
9557  predicate(UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9558  match(Set dst (AddD dst con));
9559  ins_cost(200);
9560  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9561            "DADDp  $dst,ST" %}
9562  ins_encode %{
9563    __ fld_d($constantaddress($con));
9564    __ faddp($dst$$reg);
9565  %}
9566  ins_pipe(fpu_reg_mem);
9567%}
9568
9569instruct addDPR_reg_imm_round(stackSlotD dst, regDPR src, immDPR con) %{
9570  predicate(UseSSE<=1 && _kids[0]->_kids[1]->_leaf->getd() != 0.0 && _kids[0]->_kids[1]->_leaf->getd() != 1.0 );
9571  match(Set dst (RoundDouble (AddD src con)));
9572  ins_cost(200);
9573  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9574            "DADD   ST,$src\n\t"
9575            "FSTP_D $dst\t# D-round" %}
9576  ins_encode %{
9577    __ fld_d($constantaddress($con));
9578    __ fadd($src$$reg);
9579    __ fstp_d(Address(rsp, $dst$$disp));
9580  %}
9581  ins_pipe(fpu_mem_reg_con);
9582%}
9583
9584instruct mulDPR_reg(regDPR dst, regDPR src) %{
9585  predicate(UseSSE<=1);
9586  match(Set dst (MulD dst src));
9587  format %{ "FLD    $src\n\t"
9588            "DMULp  $dst,ST" %}
9589  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9590  ins_cost(150);
9591  ins_encode( Push_Reg_DPR(src),
9592              OpcP, RegOpc(dst) );
9593  ins_pipe( fpu_reg_reg );
9594%}
9595
9596// Strict FP instruction biases argument before multiply then
9597// biases result to avoid double rounding of subnormals.
9598//
9599// scale arg1 by multiplying arg1 by 2^(-15360)
9600// load arg2
9601// multiply scaled arg1 by arg2
9602// rescale product by 2^(15360)
9603//
9604instruct strictfp_mulDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9605  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9606  match(Set dst (MulD dst src));
9607  ins_cost(1);   // Select this instruction for all strict FP double multiplies
9608
9609  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9610            "DMULp  $dst,ST\n\t"
9611            "FLD    $src\n\t"
9612            "DMULp  $dst,ST\n\t"
9613            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9614            "DMULp  $dst,ST\n\t" %}
9615  opcode(0xDE, 0x1); /* DE C8+i or DE /1*/
9616  ins_encode( strictfp_bias1(dst),
9617              Push_Reg_DPR(src),
9618              OpcP, RegOpc(dst),
9619              strictfp_bias2(dst) );
9620  ins_pipe( fpu_reg_reg );
9621%}
9622
9623instruct mulDPR_reg_imm(regDPR dst, immDPR con) %{
9624  predicate( UseSSE<=1 && _kids[1]->_leaf->getd() != 0.0 && _kids[1]->_leaf->getd() != 1.0 );
9625  match(Set dst (MulD dst con));
9626  ins_cost(200);
9627  format %{ "FLD_D  [$constantaddress]\t# load from constant table: double=$con\n\t"
9628            "DMULp  $dst,ST" %}
9629  ins_encode %{
9630    __ fld_d($constantaddress($con));
9631    __ fmulp($dst$$reg);
9632  %}
9633  ins_pipe(fpu_reg_mem);
9634%}
9635
9636
9637instruct mulDPR_reg_mem(regDPR dst, memory src) %{
9638  predicate( UseSSE<=1 );
9639  match(Set dst (MulD dst (LoadD src)));
9640  ins_cost(200);
9641  format %{ "FLD_D  $src\n\t"
9642            "DMULp  $dst,ST" %}
9643  opcode(0xDE, 0x1, 0xDD); /* DE C8+i or DE /1*/  /* LoadD  DD /0 */
9644  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
9645              OpcP, RegOpc(dst) );
9646  ins_pipe( fpu_reg_mem );
9647%}
9648
9649//
9650// Cisc-alternate to reg-reg multiply
9651instruct mulDPR_reg_mem_cisc(regDPR dst, regDPR src, memory mem) %{
9652  predicate( UseSSE<=1 );
9653  match(Set dst (MulD src (LoadD mem)));
9654  ins_cost(250);
9655  format %{ "FLD_D  $mem\n\t"
9656            "DMUL   ST,$src\n\t"
9657            "FSTP_D $dst" %}
9658  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadD D9 /0 */
9659  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem),
9660              OpcReg_FPR(src),
9661              Pop_Reg_DPR(dst) );
9662  ins_pipe( fpu_reg_reg_mem );
9663%}
9664
9665
9666// MACRO3 -- addDPR a mulDPR
9667// This instruction is a '2-address' instruction in that the result goes
9668// back to src2.  This eliminates a move from the macro; possibly the
9669// register allocator will have to add it back (and maybe not).
9670instruct addDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9671  predicate( UseSSE<=1 );
9672  match(Set src2 (AddD (MulD src0 src1) src2));
9673  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9674            "DMUL   ST,$src1\n\t"
9675            "DADDp  $src2,ST" %}
9676  ins_cost(250);
9677  opcode(0xDD); /* LoadD DD /0 */
9678  ins_encode( Push_Reg_FPR(src0),
9679              FMul_ST_reg(src1),
9680              FAddP_reg_ST(src2) );
9681  ins_pipe( fpu_reg_reg_reg );
9682%}
9683
9684
9685// MACRO3 -- subDPR a mulDPR
9686instruct subDPR_mulDPR_reg(regDPR src2, regDPR src1, regDPR src0) %{
9687  predicate( UseSSE<=1 );
9688  match(Set src2 (SubD (MulD src0 src1) src2));
9689  format %{ "FLD    $src0\t# ===MACRO3d===\n\t"
9690            "DMUL   ST,$src1\n\t"
9691            "DSUBRp $src2,ST" %}
9692  ins_cost(250);
9693  ins_encode( Push_Reg_FPR(src0),
9694              FMul_ST_reg(src1),
9695              Opcode(0xDE), Opc_plus(0xE0,src2));
9696  ins_pipe( fpu_reg_reg_reg );
9697%}
9698
9699
9700instruct divDPR_reg(regDPR dst, regDPR src) %{
9701  predicate( UseSSE<=1 );
9702  match(Set dst (DivD dst src));
9703
9704  format %{ "FLD    $src\n\t"
9705            "FDIVp  $dst,ST" %}
9706  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9707  ins_cost(150);
9708  ins_encode( Push_Reg_DPR(src),
9709              OpcP, RegOpc(dst) );
9710  ins_pipe( fpu_reg_reg );
9711%}
9712
9713// Strict FP instruction biases argument before division then
9714// biases result, to avoid double rounding of subnormals.
9715//
9716// scale dividend by multiplying dividend by 2^(-15360)
9717// load divisor
9718// divide scaled dividend by divisor
9719// rescale quotient by 2^(15360)
9720//
9721instruct strictfp_divDPR_reg(regDPR1 dst, regnotDPR1 src) %{
9722  predicate (UseSSE<=1);
9723  match(Set dst (DivD dst src));
9724  predicate( UseSSE<=1 && Compile::current()->has_method() && Compile::current()->method()->is_strict() );
9725  ins_cost(01);
9726
9727  format %{ "FLD    StubRoutines::_fpu_subnormal_bias1\n\t"
9728            "DMULp  $dst,ST\n\t"
9729            "FLD    $src\n\t"
9730            "FDIVp  $dst,ST\n\t"
9731            "FLD    StubRoutines::_fpu_subnormal_bias2\n\t"
9732            "DMULp  $dst,ST\n\t" %}
9733  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
9734  ins_encode( strictfp_bias1(dst),
9735              Push_Reg_DPR(src),
9736              OpcP, RegOpc(dst),
9737              strictfp_bias2(dst) );
9738  ins_pipe( fpu_reg_reg );
9739%}
9740
9741instruct divDPR_reg_round(stackSlotD dst, regDPR src1, regDPR src2) %{
9742  predicate( UseSSE<=1 && !(Compile::current()->has_method() && Compile::current()->method()->is_strict()) );
9743  match(Set dst (RoundDouble (DivD src1 src2)));
9744
9745  format %{ "FLD    $src1\n\t"
9746            "FDIV   ST,$src2\n\t"
9747            "FSTP_D $dst\t# D-round" %}
9748  opcode(0xD8, 0x6); /* D8 F0+i or D8 /6 */
9749  ins_encode( Push_Reg_DPR(src1),
9750              OpcP, RegOpc(src2), Pop_Mem_DPR(dst) );
9751  ins_pipe( fpu_mem_reg_reg );
9752%}
9753
9754
9755instruct modDPR_reg(regDPR dst, regDPR src, eAXRegI rax, eFlagsReg cr) %{
9756  predicate(UseSSE<=1);
9757  match(Set dst (ModD dst src));
9758  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
9759
9760  format %{ "DMOD   $dst,$src" %}
9761  ins_cost(250);
9762  ins_encode(Push_Reg_Mod_DPR(dst, src),
9763              emitModDPR(),
9764              Push_Result_Mod_DPR(src),
9765              Pop_Reg_DPR(dst));
9766  ins_pipe( pipe_slow );
9767%}
9768
9769instruct modD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eFlagsReg cr) %{
9770  predicate(UseSSE>=2);
9771  match(Set dst (ModD src0 src1));
9772  effect(KILL rax, KILL cr);
9773
9774  format %{ "SUB    ESP,8\t # DMOD\n"
9775          "\tMOVSD  [ESP+0],$src1\n"
9776          "\tFLD_D  [ESP+0]\n"
9777          "\tMOVSD  [ESP+0],$src0\n"
9778          "\tFLD_D  [ESP+0]\n"
9779     "loop:\tFPREM\n"
9780          "\tFWAIT\n"
9781          "\tFNSTSW AX\n"
9782          "\tSAHF\n"
9783          "\tJP     loop\n"
9784          "\tFSTP_D [ESP+0]\n"
9785          "\tMOVSD  $dst,[ESP+0]\n"
9786          "\tADD    ESP,8\n"
9787          "\tFSTP   ST0\t # Restore FPU Stack"
9788    %}
9789  ins_cost(250);
9790  ins_encode( Push_ModD_encoding(src0, src1), emitModDPR(), Push_ResultD(dst), PopFPU);
9791  ins_pipe( pipe_slow );
9792%}
9793
9794instruct sinDPR_reg(regDPR1 dst, regDPR1 src) %{
9795  predicate (UseSSE<=1);
9796  match(Set dst (SinD src));
9797  ins_cost(1800);
9798  format %{ "DSIN   $dst" %}
9799  opcode(0xD9, 0xFE);
9800  ins_encode( OpcP, OpcS );
9801  ins_pipe( pipe_slow );
9802%}
9803
9804instruct sinD_reg(regD dst, eFlagsReg cr) %{
9805  predicate (UseSSE>=2);
9806  match(Set dst (SinD dst));
9807  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9808  ins_cost(1800);
9809  format %{ "DSIN   $dst" %}
9810  opcode(0xD9, 0xFE);
9811  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9812  ins_pipe( pipe_slow );
9813%}
9814
9815instruct cosDPR_reg(regDPR1 dst, regDPR1 src) %{
9816  predicate (UseSSE<=1);
9817  match(Set dst (CosD src));
9818  ins_cost(1800);
9819  format %{ "DCOS   $dst" %}
9820  opcode(0xD9, 0xFF);
9821  ins_encode( OpcP, OpcS );
9822  ins_pipe( pipe_slow );
9823%}
9824
9825instruct cosD_reg(regD dst, eFlagsReg cr) %{
9826  predicate (UseSSE>=2);
9827  match(Set dst (CosD dst));
9828  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9829  ins_cost(1800);
9830  format %{ "DCOS   $dst" %}
9831  opcode(0xD9, 0xFF);
9832  ins_encode( Push_SrcD(dst), OpcP, OpcS, Push_ResultD(dst) );
9833  ins_pipe( pipe_slow );
9834%}
9835
9836instruct tanDPR_reg(regDPR1 dst, regDPR1 src) %{
9837  predicate (UseSSE<=1);
9838  match(Set dst(TanD src));
9839  format %{ "DTAN   $dst" %}
9840  ins_encode( Opcode(0xD9), Opcode(0xF2),    // fptan
9841              Opcode(0xDD), Opcode(0xD8));   // fstp st
9842  ins_pipe( pipe_slow );
9843%}
9844
9845instruct tanD_reg(regD dst, eFlagsReg cr) %{
9846  predicate (UseSSE>=2);
9847  match(Set dst(TanD dst));
9848  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9849  format %{ "DTAN   $dst" %}
9850  ins_encode( Push_SrcD(dst),
9851              Opcode(0xD9), Opcode(0xF2),    // fptan
9852              Opcode(0xDD), Opcode(0xD8),   // fstp st
9853              Push_ResultD(dst) );
9854  ins_pipe( pipe_slow );
9855%}
9856
9857instruct atanDPR_reg(regDPR dst, regDPR src) %{
9858  predicate (UseSSE<=1);
9859  match(Set dst(AtanD dst src));
9860  format %{ "DATA   $dst,$src" %}
9861  opcode(0xD9, 0xF3);
9862  ins_encode( Push_Reg_DPR(src),
9863              OpcP, OpcS, RegOpc(dst) );
9864  ins_pipe( pipe_slow );
9865%}
9866
9867instruct atanD_reg(regD dst, regD src, eFlagsReg cr) %{
9868  predicate (UseSSE>=2);
9869  match(Set dst(AtanD dst src));
9870  effect(KILL cr); // Push_{Src|Result}D() uses "{SUB|ADD} ESP,8"
9871  format %{ "DATA   $dst,$src" %}
9872  opcode(0xD9, 0xF3);
9873  ins_encode( Push_SrcD(src),
9874              OpcP, OpcS, Push_ResultD(dst) );
9875  ins_pipe( pipe_slow );
9876%}
9877
9878instruct sqrtDPR_reg(regDPR dst, regDPR src) %{
9879  predicate (UseSSE<=1);
9880  match(Set dst (SqrtD src));
9881  format %{ "DSQRT  $dst,$src" %}
9882  opcode(0xFA, 0xD9);
9883  ins_encode( Push_Reg_DPR(src),
9884              OpcS, OpcP, Pop_Reg_DPR(dst) );
9885  ins_pipe( pipe_slow );
9886%}
9887
9888instruct powDPR_reg(regDPR X, regDPR1 Y, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9889  predicate (UseSSE<=1);
9890  match(Set Y (PowD X Y));  // Raise X to the Yth power
9891  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9892  format %{ "fast_pow $X $Y -> $Y  // KILL $rax, $rcx, $rdx" %}
9893  ins_encode %{
9894    __ subptr(rsp, 8);
9895    __ fld_s($X$$reg - 1);
9896    __ fast_pow();
9897    __ addptr(rsp, 8);
9898  %}
9899  ins_pipe( pipe_slow );
9900%}
9901
9902instruct powD_reg(regD dst, regD src0, regD src1, eAXRegI rax, eDXRegI rdx, eCXRegI rcx, eFlagsReg cr) %{
9903  predicate (UseSSE>=2);
9904  match(Set dst (PowD src0 src1));  // Raise src0 to the src1'th power
9905  effect(KILL rax, KILL rdx, KILL rcx, KILL cr);
9906  format %{ "fast_pow $src0 $src1 -> $dst  // KILL $rax, $rcx, $rdx" %}
9907  ins_encode %{
9908    __ subptr(rsp, 8);
9909    __ movdbl(Address(rsp, 0), $src1$$XMMRegister);
9910    __ fld_d(Address(rsp, 0));
9911    __ movdbl(Address(rsp, 0), $src0$$XMMRegister);
9912    __ fld_d(Address(rsp, 0));
9913    __ fast_pow();
9914    __ fstp_d(Address(rsp, 0));
9915    __ movdbl($dst$$XMMRegister, Address(rsp, 0));
9916    __ addptr(rsp, 8);
9917  %}
9918  ins_pipe( pipe_slow );
9919%}
9920
9921instruct log10DPR_reg(regDPR1 dst, regDPR1 src) %{
9922  predicate (UseSSE<=1);
9923  // The source Double operand on FPU stack
9924  match(Set dst (Log10D src));
9925  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9926  // fxch         ; swap ST(0) with ST(1)
9927  // fyl2x        ; compute log_10(2) * log_2(x)
9928  format %{ "FLDLG2 \t\t\t#Log10\n\t"
9929            "FXCH   \n\t"
9930            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9931         %}
9932  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9933              Opcode(0xD9), Opcode(0xC9),   // fxch
9934              Opcode(0xD9), Opcode(0xF1));  // fyl2x
9935
9936  ins_pipe( pipe_slow );
9937%}
9938
9939instruct log10D_reg(regD dst, regD src, eFlagsReg cr) %{
9940  predicate (UseSSE>=2);
9941  effect(KILL cr);
9942  match(Set dst (Log10D src));
9943  // fldlg2       ; push log_10(2) on the FPU stack; full 80-bit number
9944  // fyl2x        ; compute log_10(2) * log_2(x)
9945  format %{ "FLDLG2 \t\t\t#Log10\n\t"
9946            "FYL2X  \t\t\t# Q=Log10*Log_2(x)"
9947         %}
9948  ins_encode( Opcode(0xD9), Opcode(0xEC),   // fldlg2
9949              Push_SrcD(src),
9950              Opcode(0xD9), Opcode(0xF1),   // fyl2x
9951              Push_ResultD(dst));
9952
9953  ins_pipe( pipe_slow );
9954%}
9955
9956//-------------Float Instructions-------------------------------
9957// Float Math
9958
9959// Code for float compare:
9960//     fcompp();
9961//     fwait(); fnstsw_ax();
9962//     sahf();
9963//     movl(dst, unordered_result);
9964//     jcc(Assembler::parity, exit);
9965//     movl(dst, less_result);
9966//     jcc(Assembler::below, exit);
9967//     movl(dst, equal_result);
9968//     jcc(Assembler::equal, exit);
9969//     movl(dst, greater_result);
9970//   exit:
9971
9972// P6 version of float compare, sets condition codes in EFLAGS
9973instruct cmpFPR_cc_P6(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
9974  predicate(VM_Version::supports_cmov() && UseSSE == 0);
9975  match(Set cr (CmpF src1 src2));
9976  effect(KILL rax);
9977  ins_cost(150);
9978  format %{ "FLD    $src1\n\t"
9979            "FUCOMIP ST,$src2  // P6 instruction\n\t"
9980            "JNP    exit\n\t"
9981            "MOV    ah,1       // saw a NaN, set CF (treat as LT)\n\t"
9982            "SAHF\n"
9983     "exit:\tNOP               // avoid branch to branch" %}
9984  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9985  ins_encode( Push_Reg_DPR(src1),
9986              OpcP, RegOpc(src2),
9987              cmpF_P6_fixup );
9988  ins_pipe( pipe_slow );
9989%}
9990
9991instruct cmpFPR_cc_P6CF(eFlagsRegUCF cr, regFPR src1, regFPR src2) %{
9992  predicate(VM_Version::supports_cmov() && UseSSE == 0);
9993  match(Set cr (CmpF src1 src2));
9994  ins_cost(100);
9995  format %{ "FLD    $src1\n\t"
9996            "FUCOMIP ST,$src2  // P6 instruction" %}
9997  opcode(0xDF, 0x05); /* DF E8+i or DF /5 */
9998  ins_encode( Push_Reg_DPR(src1),
9999              OpcP, RegOpc(src2));
10000  ins_pipe( pipe_slow );
10001%}
10002
10003
10004// Compare & branch
10005instruct cmpFPR_cc(eFlagsRegU cr, regFPR src1, regFPR src2, eAXRegI rax) %{
10006  predicate(UseSSE == 0);
10007  match(Set cr (CmpF src1 src2));
10008  effect(KILL rax);
10009  ins_cost(200);
10010  format %{ "FLD    $src1\n\t"
10011            "FCOMp  $src2\n\t"
10012            "FNSTSW AX\n\t"
10013            "TEST   AX,0x400\n\t"
10014            "JZ,s   flags\n\t"
10015            "MOV    AH,1\t# unordered treat as LT\n"
10016    "flags:\tSAHF" %}
10017  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10018  ins_encode( Push_Reg_DPR(src1),
10019              OpcP, RegOpc(src2),
10020              fpu_flags);
10021  ins_pipe( pipe_slow );
10022%}
10023
10024// Compare vs zero into -1,0,1
10025instruct cmpFPR_0(rRegI dst, regFPR src1, immFPR0 zero, eAXRegI rax, eFlagsReg cr) %{
10026  predicate(UseSSE == 0);
10027  match(Set dst (CmpF3 src1 zero));
10028  effect(KILL cr, KILL rax);
10029  ins_cost(280);
10030  format %{ "FTSTF  $dst,$src1" %}
10031  opcode(0xE4, 0xD9);
10032  ins_encode( Push_Reg_DPR(src1),
10033              OpcS, OpcP, PopFPU,
10034              CmpF_Result(dst));
10035  ins_pipe( pipe_slow );
10036%}
10037
10038// Compare into -1,0,1
10039instruct cmpFPR_reg(rRegI dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10040  predicate(UseSSE == 0);
10041  match(Set dst (CmpF3 src1 src2));
10042  effect(KILL cr, KILL rax);
10043  ins_cost(300);
10044  format %{ "FCMPF  $dst,$src1,$src2" %}
10045  opcode(0xD8, 0x3); /* D8 D8+i or D8 /3 */
10046  ins_encode( Push_Reg_DPR(src1),
10047              OpcP, RegOpc(src2),
10048              CmpF_Result(dst));
10049  ins_pipe( pipe_slow );
10050%}
10051
10052// float compare and set condition codes in EFLAGS by XMM regs
10053instruct cmpF_cc(eFlagsRegU cr, regF src1, regF src2) %{
10054  predicate(UseSSE>=1);
10055  match(Set cr (CmpF src1 src2));
10056  ins_cost(145);
10057  format %{ "UCOMISS $src1,$src2\n\t"
10058            "JNP,s   exit\n\t"
10059            "PUSHF\t# saw NaN, set CF\n\t"
10060            "AND     [rsp], #0xffffff2b\n\t"
10061            "POPF\n"
10062    "exit:" %}
10063  ins_encode %{
10064    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10065    emit_cmpfp_fixup(_masm);
10066  %}
10067  ins_pipe( pipe_slow );
10068%}
10069
10070instruct cmpF_ccCF(eFlagsRegUCF cr, regF src1, regF src2) %{
10071  predicate(UseSSE>=1);
10072  match(Set cr (CmpF src1 src2));
10073  ins_cost(100);
10074  format %{ "UCOMISS $src1,$src2" %}
10075  ins_encode %{
10076    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10077  %}
10078  ins_pipe( pipe_slow );
10079%}
10080
10081// float compare and set condition codes in EFLAGS by XMM regs
10082instruct cmpF_ccmem(eFlagsRegU cr, regF src1, memory src2) %{
10083  predicate(UseSSE>=1);
10084  match(Set cr (CmpF src1 (LoadF src2)));
10085  ins_cost(165);
10086  format %{ "UCOMISS $src1,$src2\n\t"
10087            "JNP,s   exit\n\t"
10088            "PUSHF\t# saw NaN, set CF\n\t"
10089            "AND     [rsp], #0xffffff2b\n\t"
10090            "POPF\n"
10091    "exit:" %}
10092  ins_encode %{
10093    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10094    emit_cmpfp_fixup(_masm);
10095  %}
10096  ins_pipe( pipe_slow );
10097%}
10098
10099instruct cmpF_ccmemCF(eFlagsRegUCF cr, regF src1, memory src2) %{
10100  predicate(UseSSE>=1);
10101  match(Set cr (CmpF src1 (LoadF src2)));
10102  ins_cost(100);
10103  format %{ "UCOMISS $src1,$src2" %}
10104  ins_encode %{
10105    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10106  %}
10107  ins_pipe( pipe_slow );
10108%}
10109
10110// Compare into -1,0,1 in XMM
10111instruct cmpF_reg(xRegI dst, regF src1, regF src2, eFlagsReg cr) %{
10112  predicate(UseSSE>=1);
10113  match(Set dst (CmpF3 src1 src2));
10114  effect(KILL cr);
10115  ins_cost(255);
10116  format %{ "UCOMISS $src1, $src2\n\t"
10117            "MOV     $dst, #-1\n\t"
10118            "JP,s    done\n\t"
10119            "JB,s    done\n\t"
10120            "SETNE   $dst\n\t"
10121            "MOVZB   $dst, $dst\n"
10122    "done:" %}
10123  ins_encode %{
10124    __ ucomiss($src1$$XMMRegister, $src2$$XMMRegister);
10125    emit_cmpfp3(_masm, $dst$$Register);
10126  %}
10127  ins_pipe( pipe_slow );
10128%}
10129
10130// Compare into -1,0,1 in XMM and memory
10131instruct cmpF_regmem(xRegI dst, regF src1, memory src2, eFlagsReg cr) %{
10132  predicate(UseSSE>=1);
10133  match(Set dst (CmpF3 src1 (LoadF src2)));
10134  effect(KILL cr);
10135  ins_cost(275);
10136  format %{ "UCOMISS $src1, $src2\n\t"
10137            "MOV     $dst, #-1\n\t"
10138            "JP,s    done\n\t"
10139            "JB,s    done\n\t"
10140            "SETNE   $dst\n\t"
10141            "MOVZB   $dst, $dst\n"
10142    "done:" %}
10143  ins_encode %{
10144    __ ucomiss($src1$$XMMRegister, $src2$$Address);
10145    emit_cmpfp3(_masm, $dst$$Register);
10146  %}
10147  ins_pipe( pipe_slow );
10148%}
10149
10150// Spill to obtain 24-bit precision
10151instruct subFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10152  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10153  match(Set dst (SubF src1 src2));
10154
10155  format %{ "FSUB   $dst,$src1 - $src2" %}
10156  opcode(0xD8, 0x4); /* D8 E0+i or D8 /4 mod==0x3 ;; result in TOS */
10157  ins_encode( Push_Reg_FPR(src1),
10158              OpcReg_FPR(src2),
10159              Pop_Mem_FPR(dst) );
10160  ins_pipe( fpu_mem_reg_reg );
10161%}
10162//
10163// This instruction does not round to 24-bits
10164instruct subFPR_reg(regFPR dst, regFPR src) %{
10165  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10166  match(Set dst (SubF dst src));
10167
10168  format %{ "FSUB   $dst,$src" %}
10169  opcode(0xDE, 0x5); /* DE E8+i  or DE /5 */
10170  ins_encode( Push_Reg_FPR(src),
10171              OpcP, RegOpc(dst) );
10172  ins_pipe( fpu_reg_reg );
10173%}
10174
10175// Spill to obtain 24-bit precision
10176instruct addFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10177  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10178  match(Set dst (AddF src1 src2));
10179
10180  format %{ "FADD   $dst,$src1,$src2" %}
10181  opcode(0xD8, 0x0); /* D8 C0+i */
10182  ins_encode( Push_Reg_FPR(src2),
10183              OpcReg_FPR(src1),
10184              Pop_Mem_FPR(dst) );
10185  ins_pipe( fpu_mem_reg_reg );
10186%}
10187//
10188// This instruction does not round to 24-bits
10189instruct addFPR_reg(regFPR dst, regFPR src) %{
10190  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10191  match(Set dst (AddF dst src));
10192
10193  format %{ "FLD    $src\n\t"
10194            "FADDp  $dst,ST" %}
10195  opcode(0xDE, 0x0); /* DE C0+i or DE /0*/
10196  ins_encode( Push_Reg_FPR(src),
10197              OpcP, RegOpc(dst) );
10198  ins_pipe( fpu_reg_reg );
10199%}
10200
10201instruct absFPR_reg(regFPR1 dst, regFPR1 src) %{
10202  predicate(UseSSE==0);
10203  match(Set dst (AbsF src));
10204  ins_cost(100);
10205  format %{ "FABS" %}
10206  opcode(0xE1, 0xD9);
10207  ins_encode( OpcS, OpcP );
10208  ins_pipe( fpu_reg_reg );
10209%}
10210
10211instruct negFPR_reg(regFPR1 dst, regFPR1 src) %{
10212  predicate(UseSSE==0);
10213  match(Set dst (NegF src));
10214  ins_cost(100);
10215  format %{ "FCHS" %}
10216  opcode(0xE0, 0xD9);
10217  ins_encode( OpcS, OpcP );
10218  ins_pipe( fpu_reg_reg );
10219%}
10220
10221// Cisc-alternate to addFPR_reg
10222// Spill to obtain 24-bit precision
10223instruct addFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10224  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10225  match(Set dst (AddF src1 (LoadF src2)));
10226
10227  format %{ "FLD    $src2\n\t"
10228            "FADD   ST,$src1\n\t"
10229            "FSTP_S $dst" %}
10230  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10231  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10232              OpcReg_FPR(src1),
10233              Pop_Mem_FPR(dst) );
10234  ins_pipe( fpu_mem_reg_mem );
10235%}
10236//
10237// Cisc-alternate to addFPR_reg
10238// This instruction does not round to 24-bits
10239instruct addFPR_reg_mem(regFPR dst, memory src) %{
10240  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10241  match(Set dst (AddF dst (LoadF src)));
10242
10243  format %{ "FADD   $dst,$src" %}
10244  opcode(0xDE, 0x0, 0xD9); /* DE C0+i or DE /0*/  /* LoadF  D9 /0 */
10245  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src),
10246              OpcP, RegOpc(dst) );
10247  ins_pipe( fpu_reg_mem );
10248%}
10249
10250// // Following two instructions for _222_mpegaudio
10251// Spill to obtain 24-bit precision
10252instruct addFPR24_mem_reg(stackSlotF dst, regFPR src2, memory src1 ) %{
10253  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10254  match(Set dst (AddF src1 src2));
10255
10256  format %{ "FADD   $dst,$src1,$src2" %}
10257  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10258  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src1),
10259              OpcReg_FPR(src2),
10260              Pop_Mem_FPR(dst) );
10261  ins_pipe( fpu_mem_reg_mem );
10262%}
10263
10264// Cisc-spill variant
10265// Spill to obtain 24-bit precision
10266instruct addFPR24_mem_cisc(stackSlotF dst, memory src1, memory src2) %{
10267  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10268  match(Set dst (AddF src1 (LoadF src2)));
10269
10270  format %{ "FADD   $dst,$src1,$src2 cisc" %}
10271  opcode(0xD8, 0x0, 0xD9); /* D8 C0+i */  /* LoadF  D9 /0 */
10272  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10273              set_instruction_start,
10274              OpcP, RMopc_Mem(secondary,src1),
10275              Pop_Mem_FPR(dst) );
10276  ins_pipe( fpu_mem_mem_mem );
10277%}
10278
10279// Spill to obtain 24-bit precision
10280instruct addFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10281  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10282  match(Set dst (AddF src1 src2));
10283
10284  format %{ "FADD   $dst,$src1,$src2" %}
10285  opcode(0xD8, 0x0, 0xD9); /* D8 /0 */  /* LoadF  D9 /0 */
10286  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10287              set_instruction_start,
10288              OpcP, RMopc_Mem(secondary,src1),
10289              Pop_Mem_FPR(dst) );
10290  ins_pipe( fpu_mem_mem_mem );
10291%}
10292
10293
10294// Spill to obtain 24-bit precision
10295instruct addFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10296  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10297  match(Set dst (AddF src con));
10298  format %{ "FLD    $src\n\t"
10299            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10300            "FSTP_S $dst"  %}
10301  ins_encode %{
10302    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10303    __ fadd_s($constantaddress($con));
10304    __ fstp_s(Address(rsp, $dst$$disp));
10305  %}
10306  ins_pipe(fpu_mem_reg_con);
10307%}
10308//
10309// This instruction does not round to 24-bits
10310instruct addFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10311  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10312  match(Set dst (AddF src con));
10313  format %{ "FLD    $src\n\t"
10314            "FADD_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10315            "FSTP   $dst"  %}
10316  ins_encode %{
10317    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10318    __ fadd_s($constantaddress($con));
10319    __ fstp_d($dst$$reg);
10320  %}
10321  ins_pipe(fpu_reg_reg_con);
10322%}
10323
10324// Spill to obtain 24-bit precision
10325instruct mulFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10326  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10327  match(Set dst (MulF src1 src2));
10328
10329  format %{ "FLD    $src1\n\t"
10330            "FMUL   $src2\n\t"
10331            "FSTP_S $dst"  %}
10332  opcode(0xD8, 0x1); /* D8 C8+i or D8 /1 ;; result in TOS */
10333  ins_encode( Push_Reg_FPR(src1),
10334              OpcReg_FPR(src2),
10335              Pop_Mem_FPR(dst) );
10336  ins_pipe( fpu_mem_reg_reg );
10337%}
10338//
10339// This instruction does not round to 24-bits
10340instruct mulFPR_reg(regFPR dst, regFPR src1, regFPR src2) %{
10341  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10342  match(Set dst (MulF src1 src2));
10343
10344  format %{ "FLD    $src1\n\t"
10345            "FMUL   $src2\n\t"
10346            "FSTP_S $dst"  %}
10347  opcode(0xD8, 0x1); /* D8 C8+i */
10348  ins_encode( Push_Reg_FPR(src2),
10349              OpcReg_FPR(src1),
10350              Pop_Reg_FPR(dst) );
10351  ins_pipe( fpu_reg_reg_reg );
10352%}
10353
10354
10355// Spill to obtain 24-bit precision
10356// Cisc-alternate to reg-reg multiply
10357instruct mulFPR24_reg_mem(stackSlotF dst, regFPR src1, memory src2) %{
10358  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10359  match(Set dst (MulF src1 (LoadF src2)));
10360
10361  format %{ "FLD_S  $src2\n\t"
10362            "FMUL   $src1\n\t"
10363            "FSTP_S $dst"  %}
10364  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or DE /1*/  /* LoadF D9 /0 */
10365  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10366              OpcReg_FPR(src1),
10367              Pop_Mem_FPR(dst) );
10368  ins_pipe( fpu_mem_reg_mem );
10369%}
10370//
10371// This instruction does not round to 24-bits
10372// Cisc-alternate to reg-reg multiply
10373instruct mulFPR_reg_mem(regFPR dst, regFPR src1, memory src2) %{
10374  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10375  match(Set dst (MulF src1 (LoadF src2)));
10376
10377  format %{ "FMUL   $dst,$src1,$src2" %}
10378  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i */  /* LoadF D9 /0 */
10379  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10380              OpcReg_FPR(src1),
10381              Pop_Reg_FPR(dst) );
10382  ins_pipe( fpu_reg_reg_mem );
10383%}
10384
10385// Spill to obtain 24-bit precision
10386instruct mulFPR24_mem_mem(stackSlotF dst, memory src1, memory src2) %{
10387  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10388  match(Set dst (MulF src1 src2));
10389
10390  format %{ "FMUL   $dst,$src1,$src2" %}
10391  opcode(0xD8, 0x1, 0xD9); /* D8 /1 */  /* LoadF D9 /0 */
10392  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,src2),
10393              set_instruction_start,
10394              OpcP, RMopc_Mem(secondary,src1),
10395              Pop_Mem_FPR(dst) );
10396  ins_pipe( fpu_mem_mem_mem );
10397%}
10398
10399// Spill to obtain 24-bit precision
10400instruct mulFPR24_reg_imm(stackSlotF dst, regFPR src, immFPR con) %{
10401  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10402  match(Set dst (MulF src con));
10403
10404  format %{ "FLD    $src\n\t"
10405            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10406            "FSTP_S $dst"  %}
10407  ins_encode %{
10408    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10409    __ fmul_s($constantaddress($con));
10410    __ fstp_s(Address(rsp, $dst$$disp));
10411  %}
10412  ins_pipe(fpu_mem_reg_con);
10413%}
10414//
10415// This instruction does not round to 24-bits
10416instruct mulFPR_reg_imm(regFPR dst, regFPR src, immFPR con) %{
10417  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10418  match(Set dst (MulF src con));
10419
10420  format %{ "FLD    $src\n\t"
10421            "FMUL_S [$constantaddress]\t# load from constant table: float=$con\n\t"
10422            "FSTP   $dst"  %}
10423  ins_encode %{
10424    __ fld_s($src$$reg - 1);  // FLD ST(i-1)
10425    __ fmul_s($constantaddress($con));
10426    __ fstp_d($dst$$reg);
10427  %}
10428  ins_pipe(fpu_reg_reg_con);
10429%}
10430
10431
10432//
10433// MACRO1 -- subsume unshared load into mulFPR
10434// This instruction does not round to 24-bits
10435instruct mulFPR_reg_load1(regFPR dst, regFPR src, memory mem1 ) %{
10436  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10437  match(Set dst (MulF (LoadF mem1) src));
10438
10439  format %{ "FLD    $mem1    ===MACRO1===\n\t"
10440            "FMUL   ST,$src\n\t"
10441            "FSTP   $dst" %}
10442  opcode(0xD8, 0x1, 0xD9); /* D8 C8+i or D8 /1 */  /* LoadF D9 /0 */
10443  ins_encode( Opcode(tertiary), RMopc_Mem(0x00,mem1),
10444              OpcReg_FPR(src),
10445              Pop_Reg_FPR(dst) );
10446  ins_pipe( fpu_reg_reg_mem );
10447%}
10448//
10449// MACRO2 -- addFPR a mulFPR which subsumed an unshared load
10450// This instruction does not round to 24-bits
10451instruct addFPR_mulFPR_reg_load1(regFPR dst, memory mem1, regFPR src1, regFPR src2) %{
10452  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10453  match(Set dst (AddF (MulF (LoadF mem1) src1) src2));
10454  ins_cost(95);
10455
10456  format %{ "FLD    $mem1     ===MACRO2===\n\t"
10457            "FMUL   ST,$src1  subsume mulFPR left load\n\t"
10458            "FADD   ST,$src2\n\t"
10459            "FSTP   $dst" %}
10460  opcode(0xD9); /* LoadF D9 /0 */
10461  ins_encode( OpcP, RMopc_Mem(0x00,mem1),
10462              FMul_ST_reg(src1),
10463              FAdd_ST_reg(src2),
10464              Pop_Reg_FPR(dst) );
10465  ins_pipe( fpu_reg_mem_reg_reg );
10466%}
10467
10468// MACRO3 -- addFPR a mulFPR
10469// This instruction does not round to 24-bits.  It is a '2-address'
10470// instruction in that the result goes back to src2.  This eliminates
10471// a move from the macro; possibly the register allocator will have
10472// to add it back (and maybe not).
10473instruct addFPR_mulFPR_reg(regFPR src2, regFPR src1, regFPR src0) %{
10474  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10475  match(Set src2 (AddF (MulF src0 src1) src2));
10476
10477  format %{ "FLD    $src0     ===MACRO3===\n\t"
10478            "FMUL   ST,$src1\n\t"
10479            "FADDP  $src2,ST" %}
10480  opcode(0xD9); /* LoadF D9 /0 */
10481  ins_encode( Push_Reg_FPR(src0),
10482              FMul_ST_reg(src1),
10483              FAddP_reg_ST(src2) );
10484  ins_pipe( fpu_reg_reg_reg );
10485%}
10486
10487// MACRO4 -- divFPR subFPR
10488// This instruction does not round to 24-bits
10489instruct subFPR_divFPR_reg(regFPR dst, regFPR src1, regFPR src2, regFPR src3) %{
10490  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10491  match(Set dst (DivF (SubF src2 src1) src3));
10492
10493  format %{ "FLD    $src2   ===MACRO4===\n\t"
10494            "FSUB   ST,$src1\n\t"
10495            "FDIV   ST,$src3\n\t"
10496            "FSTP  $dst" %}
10497  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10498  ins_encode( Push_Reg_FPR(src2),
10499              subFPR_divFPR_encode(src1,src3),
10500              Pop_Reg_FPR(dst) );
10501  ins_pipe( fpu_reg_reg_reg_reg );
10502%}
10503
10504// Spill to obtain 24-bit precision
10505instruct divFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2) %{
10506  predicate(UseSSE==0 && Compile::current()->select_24_bit_instr());
10507  match(Set dst (DivF src1 src2));
10508
10509  format %{ "FDIV   $dst,$src1,$src2" %}
10510  opcode(0xD8, 0x6); /* D8 F0+i or DE /6*/
10511  ins_encode( Push_Reg_FPR(src1),
10512              OpcReg_FPR(src2),
10513              Pop_Mem_FPR(dst) );
10514  ins_pipe( fpu_mem_reg_reg );
10515%}
10516//
10517// This instruction does not round to 24-bits
10518instruct divFPR_reg(regFPR dst, regFPR src) %{
10519  predicate(UseSSE==0 && !Compile::current()->select_24_bit_instr());
10520  match(Set dst (DivF dst src));
10521
10522  format %{ "FDIV   $dst,$src" %}
10523  opcode(0xDE, 0x7); /* DE F8+i or DE /7*/
10524  ins_encode( Push_Reg_FPR(src),
10525              OpcP, RegOpc(dst) );
10526  ins_pipe( fpu_reg_reg );
10527%}
10528
10529
10530// Spill to obtain 24-bit precision
10531instruct modFPR24_reg(stackSlotF dst, regFPR src1, regFPR src2, eAXRegI rax, eFlagsReg cr) %{
10532  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
10533  match(Set dst (ModF src1 src2));
10534  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10535
10536  format %{ "FMOD   $dst,$src1,$src2" %}
10537  ins_encode( Push_Reg_Mod_DPR(src1, src2),
10538              emitModDPR(),
10539              Push_Result_Mod_DPR(src2),
10540              Pop_Mem_FPR(dst));
10541  ins_pipe( pipe_slow );
10542%}
10543//
10544// This instruction does not round to 24-bits
10545instruct modFPR_reg(regFPR dst, regFPR src, eAXRegI rax, eFlagsReg cr) %{
10546  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
10547  match(Set dst (ModF dst src));
10548  effect(KILL rax, KILL cr); // emitModDPR() uses EAX and EFLAGS
10549
10550  format %{ "FMOD   $dst,$src" %}
10551  ins_encode(Push_Reg_Mod_DPR(dst, src),
10552              emitModDPR(),
10553              Push_Result_Mod_DPR(src),
10554              Pop_Reg_FPR(dst));
10555  ins_pipe( pipe_slow );
10556%}
10557
10558instruct modF_reg(regF dst, regF src0, regF src1, eAXRegI rax, eFlagsReg cr) %{
10559  predicate(UseSSE>=1);
10560  match(Set dst (ModF src0 src1));
10561  effect(KILL rax, KILL cr);
10562  format %{ "SUB    ESP,4\t # FMOD\n"
10563          "\tMOVSS  [ESP+0],$src1\n"
10564          "\tFLD_S  [ESP+0]\n"
10565          "\tMOVSS  [ESP+0],$src0\n"
10566          "\tFLD_S  [ESP+0]\n"
10567     "loop:\tFPREM\n"
10568          "\tFWAIT\n"
10569          "\tFNSTSW AX\n"
10570          "\tSAHF\n"
10571          "\tJP     loop\n"
10572          "\tFSTP_S [ESP+0]\n"
10573          "\tMOVSS  $dst,[ESP+0]\n"
10574          "\tADD    ESP,4\n"
10575          "\tFSTP   ST0\t # Restore FPU Stack"
10576    %}
10577  ins_cost(250);
10578  ins_encode( Push_ModF_encoding(src0, src1), emitModDPR(), Push_ResultF(dst,0x4), PopFPU);
10579  ins_pipe( pipe_slow );
10580%}
10581
10582
10583//----------Arithmetic Conversion Instructions---------------------------------
10584// The conversions operations are all Alpha sorted.  Please keep it that way!
10585
10586instruct roundFloat_mem_reg(stackSlotF dst, regFPR src) %{
10587  predicate(UseSSE==0);
10588  match(Set dst (RoundFloat src));
10589  ins_cost(125);
10590  format %{ "FST_S  $dst,$src\t# F-round" %}
10591  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
10592  ins_pipe( fpu_mem_reg );
10593%}
10594
10595instruct roundDouble_mem_reg(stackSlotD dst, regDPR src) %{
10596  predicate(UseSSE<=1);
10597  match(Set dst (RoundDouble src));
10598  ins_cost(125);
10599  format %{ "FST_D  $dst,$src\t# D-round" %}
10600  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
10601  ins_pipe( fpu_mem_reg );
10602%}
10603
10604// Force rounding to 24-bit precision and 6-bit exponent
10605instruct convDPR2FPR_reg(stackSlotF dst, regDPR src) %{
10606  predicate(UseSSE==0);
10607  match(Set dst (ConvD2F src));
10608  format %{ "FST_S  $dst,$src\t# F-round" %}
10609  expand %{
10610    roundFloat_mem_reg(dst,src);
10611  %}
10612%}
10613
10614// Force rounding to 24-bit precision and 6-bit exponent
10615instruct convDPR2F_reg(regF dst, regDPR src, eFlagsReg cr) %{
10616  predicate(UseSSE==1);
10617  match(Set dst (ConvD2F src));
10618  effect( KILL cr );
10619  format %{ "SUB    ESP,4\n\t"
10620            "FST_S  [ESP],$src\t# F-round\n\t"
10621            "MOVSS  $dst,[ESP]\n\t"
10622            "ADD ESP,4" %}
10623  ins_encode %{
10624    __ subptr(rsp, 4);
10625    if ($src$$reg != FPR1L_enc) {
10626      __ fld_s($src$$reg-1);
10627      __ fstp_s(Address(rsp, 0));
10628    } else {
10629      __ fst_s(Address(rsp, 0));
10630    }
10631    __ movflt($dst$$XMMRegister, Address(rsp, 0));
10632    __ addptr(rsp, 4);
10633  %}
10634  ins_pipe( pipe_slow );
10635%}
10636
10637// Force rounding double precision to single precision
10638instruct convD2F_reg(regF dst, regD src) %{
10639  predicate(UseSSE>=2);
10640  match(Set dst (ConvD2F src));
10641  format %{ "CVTSD2SS $dst,$src\t# F-round" %}
10642  ins_encode %{
10643    __ cvtsd2ss ($dst$$XMMRegister, $src$$XMMRegister);
10644  %}
10645  ins_pipe( pipe_slow );
10646%}
10647
10648instruct convFPR2DPR_reg_reg(regDPR dst, regFPR src) %{
10649  predicate(UseSSE==0);
10650  match(Set dst (ConvF2D src));
10651  format %{ "FST_S  $dst,$src\t# D-round" %}
10652  ins_encode( Pop_Reg_Reg_DPR(dst, src));
10653  ins_pipe( fpu_reg_reg );
10654%}
10655
10656instruct convFPR2D_reg(stackSlotD dst, regFPR src) %{
10657  predicate(UseSSE==1);
10658  match(Set dst (ConvF2D src));
10659  format %{ "FST_D  $dst,$src\t# D-round" %}
10660  expand %{
10661    roundDouble_mem_reg(dst,src);
10662  %}
10663%}
10664
10665instruct convF2DPR_reg(regDPR dst, regF src, eFlagsReg cr) %{
10666  predicate(UseSSE==1);
10667  match(Set dst (ConvF2D src));
10668  effect( KILL cr );
10669  format %{ "SUB    ESP,4\n\t"
10670            "MOVSS  [ESP] $src\n\t"
10671            "FLD_S  [ESP]\n\t"
10672            "ADD    ESP,4\n\t"
10673            "FSTP   $dst\t# D-round" %}
10674  ins_encode %{
10675    __ subptr(rsp, 4);
10676    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10677    __ fld_s(Address(rsp, 0));
10678    __ addptr(rsp, 4);
10679    __ fstp_d($dst$$reg);
10680  %}
10681  ins_pipe( pipe_slow );
10682%}
10683
10684instruct convF2D_reg(regD dst, regF src) %{
10685  predicate(UseSSE>=2);
10686  match(Set dst (ConvF2D src));
10687  format %{ "CVTSS2SD $dst,$src\t# D-round" %}
10688  ins_encode %{
10689    __ cvtss2sd ($dst$$XMMRegister, $src$$XMMRegister);
10690  %}
10691  ins_pipe( pipe_slow );
10692%}
10693
10694// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10695instruct convDPR2I_reg_reg( eAXRegI dst, eDXRegI tmp, regDPR src, eFlagsReg cr ) %{
10696  predicate(UseSSE<=1);
10697  match(Set dst (ConvD2I src));
10698  effect( KILL tmp, KILL cr );
10699  format %{ "FLD    $src\t# Convert double to int \n\t"
10700            "FLDCW  trunc mode\n\t"
10701            "SUB    ESP,4\n\t"
10702            "FISTp  [ESP + #0]\n\t"
10703            "FLDCW  std/24-bit mode\n\t"
10704            "POP    EAX\n\t"
10705            "CMP    EAX,0x80000000\n\t"
10706            "JNE,s  fast\n\t"
10707            "FLD_D  $src\n\t"
10708            "CALL   d2i_wrapper\n"
10709      "fast:" %}
10710  ins_encode( Push_Reg_DPR(src), DPR2I_encoding(src) );
10711  ins_pipe( pipe_slow );
10712%}
10713
10714// Convert a double to an int.  If the double is a NAN, stuff a zero in instead.
10715instruct convD2I_reg_reg( eAXRegI dst, eDXRegI tmp, regD src, eFlagsReg cr ) %{
10716  predicate(UseSSE>=2);
10717  match(Set dst (ConvD2I src));
10718  effect( KILL tmp, KILL cr );
10719  format %{ "CVTTSD2SI $dst, $src\n\t"
10720            "CMP    $dst,0x80000000\n\t"
10721            "JNE,s  fast\n\t"
10722            "SUB    ESP, 8\n\t"
10723            "MOVSD  [ESP], $src\n\t"
10724            "FLD_D  [ESP]\n\t"
10725            "ADD    ESP, 8\n\t"
10726            "CALL   d2i_wrapper\n"
10727      "fast:" %}
10728  ins_encode %{
10729    Label fast;
10730    __ cvttsd2sil($dst$$Register, $src$$XMMRegister);
10731    __ cmpl($dst$$Register, 0x80000000);
10732    __ jccb(Assembler::notEqual, fast);
10733    __ subptr(rsp, 8);
10734    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10735    __ fld_d(Address(rsp, 0));
10736    __ addptr(rsp, 8);
10737    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10738    __ bind(fast);
10739  %}
10740  ins_pipe( pipe_slow );
10741%}
10742
10743instruct convDPR2L_reg_reg( eADXRegL dst, regDPR src, eFlagsReg cr ) %{
10744  predicate(UseSSE<=1);
10745  match(Set dst (ConvD2L src));
10746  effect( KILL cr );
10747  format %{ "FLD    $src\t# Convert double to long\n\t"
10748            "FLDCW  trunc mode\n\t"
10749            "SUB    ESP,8\n\t"
10750            "FISTp  [ESP + #0]\n\t"
10751            "FLDCW  std/24-bit mode\n\t"
10752            "POP    EAX\n\t"
10753            "POP    EDX\n\t"
10754            "CMP    EDX,0x80000000\n\t"
10755            "JNE,s  fast\n\t"
10756            "TEST   EAX,EAX\n\t"
10757            "JNE,s  fast\n\t"
10758            "FLD    $src\n\t"
10759            "CALL   d2l_wrapper\n"
10760      "fast:" %}
10761  ins_encode( Push_Reg_DPR(src),  DPR2L_encoding(src) );
10762  ins_pipe( pipe_slow );
10763%}
10764
10765// XMM lacks a float/double->long conversion, so use the old FPU stack.
10766instruct convD2L_reg_reg( eADXRegL dst, regD src, eFlagsReg cr ) %{
10767  predicate (UseSSE>=2);
10768  match(Set dst (ConvD2L src));
10769  effect( KILL cr );
10770  format %{ "SUB    ESP,8\t# Convert double to long\n\t"
10771            "MOVSD  [ESP],$src\n\t"
10772            "FLD_D  [ESP]\n\t"
10773            "FLDCW  trunc mode\n\t"
10774            "FISTp  [ESP + #0]\n\t"
10775            "FLDCW  std/24-bit mode\n\t"
10776            "POP    EAX\n\t"
10777            "POP    EDX\n\t"
10778            "CMP    EDX,0x80000000\n\t"
10779            "JNE,s  fast\n\t"
10780            "TEST   EAX,EAX\n\t"
10781            "JNE,s  fast\n\t"
10782            "SUB    ESP,8\n\t"
10783            "MOVSD  [ESP],$src\n\t"
10784            "FLD_D  [ESP]\n\t"
10785            "ADD    ESP,8\n\t"
10786            "CALL   d2l_wrapper\n"
10787      "fast:" %}
10788  ins_encode %{
10789    Label fast;
10790    __ subptr(rsp, 8);
10791    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10792    __ fld_d(Address(rsp, 0));
10793    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10794    __ fistp_d(Address(rsp, 0));
10795    // Restore the rounding mode, mask the exception
10796    if (Compile::current()->in_24_bit_fp_mode()) {
10797      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10798    } else {
10799      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10800    }
10801    // Load the converted long, adjust CPU stack
10802    __ pop(rax);
10803    __ pop(rdx);
10804    __ cmpl(rdx, 0x80000000);
10805    __ jccb(Assembler::notEqual, fast);
10806    __ testl(rax, rax);
10807    __ jccb(Assembler::notEqual, fast);
10808    __ subptr(rsp, 8);
10809    __ movdbl(Address(rsp, 0), $src$$XMMRegister);
10810    __ fld_d(Address(rsp, 0));
10811    __ addptr(rsp, 8);
10812    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10813    __ bind(fast);
10814  %}
10815  ins_pipe( pipe_slow );
10816%}
10817
10818// Convert a double to an int.  Java semantics require we do complex
10819// manglations in the corner cases.  So we set the rounding mode to
10820// 'zero', store the darned double down as an int, and reset the
10821// rounding mode to 'nearest'.  The hardware stores a flag value down
10822// if we would overflow or converted a NAN; we check for this and
10823// and go the slow path if needed.
10824instruct convFPR2I_reg_reg(eAXRegI dst, eDXRegI tmp, regFPR src, eFlagsReg cr ) %{
10825  predicate(UseSSE==0);
10826  match(Set dst (ConvF2I src));
10827  effect( KILL tmp, KILL cr );
10828  format %{ "FLD    $src\t# Convert float to int \n\t"
10829            "FLDCW  trunc mode\n\t"
10830            "SUB    ESP,4\n\t"
10831            "FISTp  [ESP + #0]\n\t"
10832            "FLDCW  std/24-bit mode\n\t"
10833            "POP    EAX\n\t"
10834            "CMP    EAX,0x80000000\n\t"
10835            "JNE,s  fast\n\t"
10836            "FLD    $src\n\t"
10837            "CALL   d2i_wrapper\n"
10838      "fast:" %}
10839  // DPR2I_encoding works for FPR2I
10840  ins_encode( Push_Reg_FPR(src), DPR2I_encoding(src) );
10841  ins_pipe( pipe_slow );
10842%}
10843
10844// Convert a float in xmm to an int reg.
10845instruct convF2I_reg(eAXRegI dst, eDXRegI tmp, regF src, eFlagsReg cr ) %{
10846  predicate(UseSSE>=1);
10847  match(Set dst (ConvF2I src));
10848  effect( KILL tmp, KILL cr );
10849  format %{ "CVTTSS2SI $dst, $src\n\t"
10850            "CMP    $dst,0x80000000\n\t"
10851            "JNE,s  fast\n\t"
10852            "SUB    ESP, 4\n\t"
10853            "MOVSS  [ESP], $src\n\t"
10854            "FLD    [ESP]\n\t"
10855            "ADD    ESP, 4\n\t"
10856            "CALL   d2i_wrapper\n"
10857      "fast:" %}
10858  ins_encode %{
10859    Label fast;
10860    __ cvttss2sil($dst$$Register, $src$$XMMRegister);
10861    __ cmpl($dst$$Register, 0x80000000);
10862    __ jccb(Assembler::notEqual, fast);
10863    __ subptr(rsp, 4);
10864    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10865    __ fld_s(Address(rsp, 0));
10866    __ addptr(rsp, 4);
10867    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2i_wrapper())));
10868    __ bind(fast);
10869  %}
10870  ins_pipe( pipe_slow );
10871%}
10872
10873instruct convFPR2L_reg_reg( eADXRegL dst, regFPR src, eFlagsReg cr ) %{
10874  predicate(UseSSE==0);
10875  match(Set dst (ConvF2L src));
10876  effect( KILL cr );
10877  format %{ "FLD    $src\t# Convert float to long\n\t"
10878            "FLDCW  trunc mode\n\t"
10879            "SUB    ESP,8\n\t"
10880            "FISTp  [ESP + #0]\n\t"
10881            "FLDCW  std/24-bit mode\n\t"
10882            "POP    EAX\n\t"
10883            "POP    EDX\n\t"
10884            "CMP    EDX,0x80000000\n\t"
10885            "JNE,s  fast\n\t"
10886            "TEST   EAX,EAX\n\t"
10887            "JNE,s  fast\n\t"
10888            "FLD    $src\n\t"
10889            "CALL   d2l_wrapper\n"
10890      "fast:" %}
10891  // DPR2L_encoding works for FPR2L
10892  ins_encode( Push_Reg_FPR(src), DPR2L_encoding(src) );
10893  ins_pipe( pipe_slow );
10894%}
10895
10896// XMM lacks a float/double->long conversion, so use the old FPU stack.
10897instruct convF2L_reg_reg( eADXRegL dst, regF src, eFlagsReg cr ) %{
10898  predicate (UseSSE>=1);
10899  match(Set dst (ConvF2L src));
10900  effect( KILL cr );
10901  format %{ "SUB    ESP,8\t# Convert float to long\n\t"
10902            "MOVSS  [ESP],$src\n\t"
10903            "FLD_S  [ESP]\n\t"
10904            "FLDCW  trunc mode\n\t"
10905            "FISTp  [ESP + #0]\n\t"
10906            "FLDCW  std/24-bit mode\n\t"
10907            "POP    EAX\n\t"
10908            "POP    EDX\n\t"
10909            "CMP    EDX,0x80000000\n\t"
10910            "JNE,s  fast\n\t"
10911            "TEST   EAX,EAX\n\t"
10912            "JNE,s  fast\n\t"
10913            "SUB    ESP,4\t# Convert float to long\n\t"
10914            "MOVSS  [ESP],$src\n\t"
10915            "FLD_S  [ESP]\n\t"
10916            "ADD    ESP,4\n\t"
10917            "CALL   d2l_wrapper\n"
10918      "fast:" %}
10919  ins_encode %{
10920    Label fast;
10921    __ subptr(rsp, 8);
10922    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10923    __ fld_s(Address(rsp, 0));
10924    __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_trunc()));
10925    __ fistp_d(Address(rsp, 0));
10926    // Restore the rounding mode, mask the exception
10927    if (Compile::current()->in_24_bit_fp_mode()) {
10928      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_24()));
10929    } else {
10930      __ fldcw(ExternalAddress(StubRoutines::addr_fpu_cntrl_wrd_std()));
10931    }
10932    // Load the converted long, adjust CPU stack
10933    __ pop(rax);
10934    __ pop(rdx);
10935    __ cmpl(rdx, 0x80000000);
10936    __ jccb(Assembler::notEqual, fast);
10937    __ testl(rax, rax);
10938    __ jccb(Assembler::notEqual, fast);
10939    __ subptr(rsp, 4);
10940    __ movflt(Address(rsp, 0), $src$$XMMRegister);
10941    __ fld_s(Address(rsp, 0));
10942    __ addptr(rsp, 4);
10943    __ call(RuntimeAddress(CAST_FROM_FN_PTR(address, StubRoutines::d2l_wrapper())));
10944    __ bind(fast);
10945  %}
10946  ins_pipe( pipe_slow );
10947%}
10948
10949instruct convI2DPR_reg(regDPR dst, stackSlotI src) %{
10950  predicate( UseSSE<=1 );
10951  match(Set dst (ConvI2D src));
10952  format %{ "FILD   $src\n\t"
10953            "FSTP   $dst" %}
10954  opcode(0xDB, 0x0);  /* DB /0 */
10955  ins_encode(Push_Mem_I(src), Pop_Reg_DPR(dst));
10956  ins_pipe( fpu_reg_mem );
10957%}
10958
10959instruct convI2D_reg(regD dst, rRegI src) %{
10960  predicate( UseSSE>=2 && !UseXmmI2D );
10961  match(Set dst (ConvI2D src));
10962  format %{ "CVTSI2SD $dst,$src" %}
10963  ins_encode %{
10964    __ cvtsi2sdl ($dst$$XMMRegister, $src$$Register);
10965  %}
10966  ins_pipe( pipe_slow );
10967%}
10968
10969instruct convI2D_mem(regD dst, memory mem) %{
10970  predicate( UseSSE>=2 );
10971  match(Set dst (ConvI2D (LoadI mem)));
10972  format %{ "CVTSI2SD $dst,$mem" %}
10973  ins_encode %{
10974    __ cvtsi2sdl ($dst$$XMMRegister, $mem$$Address);
10975  %}
10976  ins_pipe( pipe_slow );
10977%}
10978
10979instruct convXI2D_reg(regD dst, rRegI src)
10980%{
10981  predicate( UseSSE>=2 && UseXmmI2D );
10982  match(Set dst (ConvI2D src));
10983
10984  format %{ "MOVD  $dst,$src\n\t"
10985            "CVTDQ2PD $dst,$dst\t# i2d" %}
10986  ins_encode %{
10987    __ movdl($dst$$XMMRegister, $src$$Register);
10988    __ cvtdq2pd($dst$$XMMRegister, $dst$$XMMRegister);
10989  %}
10990  ins_pipe(pipe_slow); // XXX
10991%}
10992
10993instruct convI2DPR_mem(regDPR dst, memory mem) %{
10994  predicate( UseSSE<=1 && !Compile::current()->select_24_bit_instr());
10995  match(Set dst (ConvI2D (LoadI mem)));
10996  format %{ "FILD   $mem\n\t"
10997            "FSTP   $dst" %}
10998  opcode(0xDB);      /* DB /0 */
10999  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11000              Pop_Reg_DPR(dst));
11001  ins_pipe( fpu_reg_mem );
11002%}
11003
11004// Convert a byte to a float; no rounding step needed.
11005instruct conv24I2FPR_reg(regFPR dst, stackSlotI src) %{
11006  predicate( UseSSE==0 && n->in(1)->Opcode() == Op_AndI && n->in(1)->in(2)->is_Con() && n->in(1)->in(2)->get_int() == 255 );
11007  match(Set dst (ConvI2F src));
11008  format %{ "FILD   $src\n\t"
11009            "FSTP   $dst" %}
11010
11011  opcode(0xDB, 0x0);  /* DB /0 */
11012  ins_encode(Push_Mem_I(src), Pop_Reg_FPR(dst));
11013  ins_pipe( fpu_reg_mem );
11014%}
11015
11016// In 24-bit mode, force exponent rounding by storing back out
11017instruct convI2FPR_SSF(stackSlotF dst, stackSlotI src) %{
11018  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11019  match(Set dst (ConvI2F src));
11020  ins_cost(200);
11021  format %{ "FILD   $src\n\t"
11022            "FSTP_S $dst" %}
11023  opcode(0xDB, 0x0);  /* DB /0 */
11024  ins_encode( Push_Mem_I(src),
11025              Pop_Mem_FPR(dst));
11026  ins_pipe( fpu_mem_mem );
11027%}
11028
11029// In 24-bit mode, force exponent rounding by storing back out
11030instruct convI2FPR_SSF_mem(stackSlotF dst, memory mem) %{
11031  predicate( UseSSE==0 && Compile::current()->select_24_bit_instr());
11032  match(Set dst (ConvI2F (LoadI mem)));
11033  ins_cost(200);
11034  format %{ "FILD   $mem\n\t"
11035            "FSTP_S $dst" %}
11036  opcode(0xDB);  /* DB /0 */
11037  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11038              Pop_Mem_FPR(dst));
11039  ins_pipe( fpu_mem_mem );
11040%}
11041
11042// This instruction does not round to 24-bits
11043instruct convI2FPR_reg(regFPR dst, stackSlotI src) %{
11044  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11045  match(Set dst (ConvI2F src));
11046  format %{ "FILD   $src\n\t"
11047            "FSTP   $dst" %}
11048  opcode(0xDB, 0x0);  /* DB /0 */
11049  ins_encode( Push_Mem_I(src),
11050              Pop_Reg_FPR(dst));
11051  ins_pipe( fpu_reg_mem );
11052%}
11053
11054// This instruction does not round to 24-bits
11055instruct convI2FPR_mem(regFPR dst, memory mem) %{
11056  predicate( UseSSE==0 && !Compile::current()->select_24_bit_instr());
11057  match(Set dst (ConvI2F (LoadI mem)));
11058  format %{ "FILD   $mem\n\t"
11059            "FSTP   $dst" %}
11060  opcode(0xDB);      /* DB /0 */
11061  ins_encode( OpcP, RMopc_Mem(0x00,mem),
11062              Pop_Reg_FPR(dst));
11063  ins_pipe( fpu_reg_mem );
11064%}
11065
11066// Convert an int to a float in xmm; no rounding step needed.
11067instruct convI2F_reg(regF dst, rRegI src) %{
11068  predicate( UseSSE==1 || UseSSE>=2 && !UseXmmI2F );
11069  match(Set dst (ConvI2F src));
11070  format %{ "CVTSI2SS $dst, $src" %}
11071  ins_encode %{
11072    __ cvtsi2ssl ($dst$$XMMRegister, $src$$Register);
11073  %}
11074  ins_pipe( pipe_slow );
11075%}
11076
11077 instruct convXI2F_reg(regF dst, rRegI src)
11078%{
11079  predicate( UseSSE>=2 && UseXmmI2F );
11080  match(Set dst (ConvI2F src));
11081
11082  format %{ "MOVD  $dst,$src\n\t"
11083            "CVTDQ2PS $dst,$dst\t# i2f" %}
11084  ins_encode %{
11085    __ movdl($dst$$XMMRegister, $src$$Register);
11086    __ cvtdq2ps($dst$$XMMRegister, $dst$$XMMRegister);
11087  %}
11088  ins_pipe(pipe_slow); // XXX
11089%}
11090
11091instruct convI2L_reg( eRegL dst, rRegI src, eFlagsReg cr) %{
11092  match(Set dst (ConvI2L src));
11093  effect(KILL cr);
11094  ins_cost(375);
11095  format %{ "MOV    $dst.lo,$src\n\t"
11096            "MOV    $dst.hi,$src\n\t"
11097            "SAR    $dst.hi,31" %}
11098  ins_encode(convert_int_long(dst,src));
11099  ins_pipe( ialu_reg_reg_long );
11100%}
11101
11102// Zero-extend convert int to long
11103instruct convI2L_reg_zex(eRegL dst, rRegI src, immL_32bits mask, eFlagsReg flags ) %{
11104  match(Set dst (AndL (ConvI2L src) mask) );
11105  effect( KILL flags );
11106  ins_cost(250);
11107  format %{ "MOV    $dst.lo,$src\n\t"
11108            "XOR    $dst.hi,$dst.hi" %}
11109  opcode(0x33); // XOR
11110  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11111  ins_pipe( ialu_reg_reg_long );
11112%}
11113
11114// Zero-extend long
11115instruct zerox_long(eRegL dst, eRegL src, immL_32bits mask, eFlagsReg flags ) %{
11116  match(Set dst (AndL src mask) );
11117  effect( KILL flags );
11118  ins_cost(250);
11119  format %{ "MOV    $dst.lo,$src.lo\n\t"
11120            "XOR    $dst.hi,$dst.hi\n\t" %}
11121  opcode(0x33); // XOR
11122  ins_encode(enc_Copy(dst,src), OpcP, RegReg_Hi2(dst,dst) );
11123  ins_pipe( ialu_reg_reg_long );
11124%}
11125
11126instruct convL2DPR_reg( stackSlotD dst, eRegL src, eFlagsReg cr) %{
11127  predicate (UseSSE<=1);
11128  match(Set dst (ConvL2D src));
11129  effect( KILL cr );
11130  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11131            "PUSH   $src.lo\n\t"
11132            "FILD   ST,[ESP + #0]\n\t"
11133            "ADD    ESP,8\n\t"
11134            "FSTP_D $dst\t# D-round" %}
11135  opcode(0xDF, 0x5);  /* DF /5 */
11136  ins_encode(convert_long_double(src), Pop_Mem_DPR(dst));
11137  ins_pipe( pipe_slow );
11138%}
11139
11140instruct convL2D_reg( regD dst, eRegL src, eFlagsReg cr) %{
11141  predicate (UseSSE>=2);
11142  match(Set dst (ConvL2D src));
11143  effect( KILL cr );
11144  format %{ "PUSH   $src.hi\t# Convert long to double\n\t"
11145            "PUSH   $src.lo\n\t"
11146            "FILD_D [ESP]\n\t"
11147            "FSTP_D [ESP]\n\t"
11148            "MOVSD  $dst,[ESP]\n\t"
11149            "ADD    ESP,8" %}
11150  opcode(0xDF, 0x5);  /* DF /5 */
11151  ins_encode(convert_long_double2(src), Push_ResultD(dst));
11152  ins_pipe( pipe_slow );
11153%}
11154
11155instruct convL2F_reg( regF dst, eRegL src, eFlagsReg cr) %{
11156  predicate (UseSSE>=1);
11157  match(Set dst (ConvL2F src));
11158  effect( KILL cr );
11159  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11160            "PUSH   $src.lo\n\t"
11161            "FILD_D [ESP]\n\t"
11162            "FSTP_S [ESP]\n\t"
11163            "MOVSS  $dst,[ESP]\n\t"
11164            "ADD    ESP,8" %}
11165  opcode(0xDF, 0x5);  /* DF /5 */
11166  ins_encode(convert_long_double2(src), Push_ResultF(dst,0x8));
11167  ins_pipe( pipe_slow );
11168%}
11169
11170instruct convL2FPR_reg( stackSlotF dst, eRegL src, eFlagsReg cr) %{
11171  match(Set dst (ConvL2F src));
11172  effect( KILL cr );
11173  format %{ "PUSH   $src.hi\t# Convert long to single float\n\t"
11174            "PUSH   $src.lo\n\t"
11175            "FILD   ST,[ESP + #0]\n\t"
11176            "ADD    ESP,8\n\t"
11177            "FSTP_S $dst\t# F-round" %}
11178  opcode(0xDF, 0x5);  /* DF /5 */
11179  ins_encode(convert_long_double(src), Pop_Mem_FPR(dst));
11180  ins_pipe( pipe_slow );
11181%}
11182
11183instruct convL2I_reg( rRegI dst, eRegL src ) %{
11184  match(Set dst (ConvL2I src));
11185  effect( DEF dst, USE src );
11186  format %{ "MOV    $dst,$src.lo" %}
11187  ins_encode(enc_CopyL_Lo(dst,src));
11188  ins_pipe( ialu_reg_reg );
11189%}
11190
11191instruct MoveF2I_stack_reg(rRegI dst, stackSlotF src) %{
11192  match(Set dst (MoveF2I src));
11193  effect( DEF dst, USE src );
11194  ins_cost(100);
11195  format %{ "MOV    $dst,$src\t# MoveF2I_stack_reg" %}
11196  ins_encode %{
11197    __ movl($dst$$Register, Address(rsp, $src$$disp));
11198  %}
11199  ins_pipe( ialu_reg_mem );
11200%}
11201
11202instruct MoveFPR2I_reg_stack(stackSlotI dst, regFPR src) %{
11203  predicate(UseSSE==0);
11204  match(Set dst (MoveF2I src));
11205  effect( DEF dst, USE src );
11206
11207  ins_cost(125);
11208  format %{ "FST_S  $dst,$src\t# MoveF2I_reg_stack" %}
11209  ins_encode( Pop_Mem_Reg_FPR(dst, src) );
11210  ins_pipe( fpu_mem_reg );
11211%}
11212
11213instruct MoveF2I_reg_stack_sse(stackSlotI dst, regF src) %{
11214  predicate(UseSSE>=1);
11215  match(Set dst (MoveF2I src));
11216  effect( DEF dst, USE src );
11217
11218  ins_cost(95);
11219  format %{ "MOVSS  $dst,$src\t# MoveF2I_reg_stack_sse" %}
11220  ins_encode %{
11221    __ movflt(Address(rsp, $dst$$disp), $src$$XMMRegister);
11222  %}
11223  ins_pipe( pipe_slow );
11224%}
11225
11226instruct MoveF2I_reg_reg_sse(rRegI dst, regF src) %{
11227  predicate(UseSSE>=2);
11228  match(Set dst (MoveF2I src));
11229  effect( DEF dst, USE src );
11230  ins_cost(85);
11231  format %{ "MOVD   $dst,$src\t# MoveF2I_reg_reg_sse" %}
11232  ins_encode %{
11233    __ movdl($dst$$Register, $src$$XMMRegister);
11234  %}
11235  ins_pipe( pipe_slow );
11236%}
11237
11238instruct MoveI2F_reg_stack(stackSlotF dst, rRegI src) %{
11239  match(Set dst (MoveI2F src));
11240  effect( DEF dst, USE src );
11241
11242  ins_cost(100);
11243  format %{ "MOV    $dst,$src\t# MoveI2F_reg_stack" %}
11244  ins_encode %{
11245    __ movl(Address(rsp, $dst$$disp), $src$$Register);
11246  %}
11247  ins_pipe( ialu_mem_reg );
11248%}
11249
11250
11251instruct MoveI2FPR_stack_reg(regFPR dst, stackSlotI src) %{
11252  predicate(UseSSE==0);
11253  match(Set dst (MoveI2F src));
11254  effect(DEF dst, USE src);
11255
11256  ins_cost(125);
11257  format %{ "FLD_S  $src\n\t"
11258            "FSTP   $dst\t# MoveI2F_stack_reg" %}
11259  opcode(0xD9);               /* D9 /0, FLD m32real */
11260  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11261              Pop_Reg_FPR(dst) );
11262  ins_pipe( fpu_reg_mem );
11263%}
11264
11265instruct MoveI2F_stack_reg_sse(regF dst, stackSlotI src) %{
11266  predicate(UseSSE>=1);
11267  match(Set dst (MoveI2F src));
11268  effect( DEF dst, USE src );
11269
11270  ins_cost(95);
11271  format %{ "MOVSS  $dst,$src\t# MoveI2F_stack_reg_sse" %}
11272  ins_encode %{
11273    __ movflt($dst$$XMMRegister, Address(rsp, $src$$disp));
11274  %}
11275  ins_pipe( pipe_slow );
11276%}
11277
11278instruct MoveI2F_reg_reg_sse(regF dst, rRegI src) %{
11279  predicate(UseSSE>=2);
11280  match(Set dst (MoveI2F src));
11281  effect( DEF dst, USE src );
11282
11283  ins_cost(85);
11284  format %{ "MOVD   $dst,$src\t# MoveI2F_reg_reg_sse" %}
11285  ins_encode %{
11286    __ movdl($dst$$XMMRegister, $src$$Register);
11287  %}
11288  ins_pipe( pipe_slow );
11289%}
11290
11291instruct MoveD2L_stack_reg(eRegL dst, stackSlotD src) %{
11292  match(Set dst (MoveD2L src));
11293  effect(DEF dst, USE src);
11294
11295  ins_cost(250);
11296  format %{ "MOV    $dst.lo,$src\n\t"
11297            "MOV    $dst.hi,$src+4\t# MoveD2L_stack_reg" %}
11298  opcode(0x8B, 0x8B);
11299  ins_encode( OpcP, RegMem(dst,src), OpcS, RegMem_Hi(dst,src));
11300  ins_pipe( ialu_mem_long_reg );
11301%}
11302
11303instruct MoveDPR2L_reg_stack(stackSlotL dst, regDPR src) %{
11304  predicate(UseSSE<=1);
11305  match(Set dst (MoveD2L src));
11306  effect(DEF dst, USE src);
11307
11308  ins_cost(125);
11309  format %{ "FST_D  $dst,$src\t# MoveD2L_reg_stack" %}
11310  ins_encode( Pop_Mem_Reg_DPR(dst, src) );
11311  ins_pipe( fpu_mem_reg );
11312%}
11313
11314instruct MoveD2L_reg_stack_sse(stackSlotL dst, regD src) %{
11315  predicate(UseSSE>=2);
11316  match(Set dst (MoveD2L src));
11317  effect(DEF dst, USE src);
11318  ins_cost(95);
11319  format %{ "MOVSD  $dst,$src\t# MoveD2L_reg_stack_sse" %}
11320  ins_encode %{
11321    __ movdbl(Address(rsp, $dst$$disp), $src$$XMMRegister);
11322  %}
11323  ins_pipe( pipe_slow );
11324%}
11325
11326instruct MoveD2L_reg_reg_sse(eRegL dst, regD src, regD tmp) %{
11327  predicate(UseSSE>=2);
11328  match(Set dst (MoveD2L src));
11329  effect(DEF dst, USE src, TEMP tmp);
11330  ins_cost(85);
11331  format %{ "MOVD   $dst.lo,$src\n\t"
11332            "PSHUFLW $tmp,$src,0x4E\n\t"
11333            "MOVD   $dst.hi,$tmp\t# MoveD2L_reg_reg_sse" %}
11334  ins_encode %{
11335    __ movdl($dst$$Register, $src$$XMMRegister);
11336    __ pshuflw($tmp$$XMMRegister, $src$$XMMRegister, 0x4e);
11337    __ movdl(HIGH_FROM_LOW($dst$$Register), $tmp$$XMMRegister);
11338  %}
11339  ins_pipe( pipe_slow );
11340%}
11341
11342instruct MoveL2D_reg_stack(stackSlotD dst, eRegL src) %{
11343  match(Set dst (MoveL2D src));
11344  effect(DEF dst, USE src);
11345
11346  ins_cost(200);
11347  format %{ "MOV    $dst,$src.lo\n\t"
11348            "MOV    $dst+4,$src.hi\t# MoveL2D_reg_stack" %}
11349  opcode(0x89, 0x89);
11350  ins_encode( OpcP, RegMem( src, dst ), OpcS, RegMem_Hi( src, dst ) );
11351  ins_pipe( ialu_mem_long_reg );
11352%}
11353
11354
11355instruct MoveL2DPR_stack_reg(regDPR dst, stackSlotL src) %{
11356  predicate(UseSSE<=1);
11357  match(Set dst (MoveL2D src));
11358  effect(DEF dst, USE src);
11359  ins_cost(125);
11360
11361  format %{ "FLD_D  $src\n\t"
11362            "FSTP   $dst\t# MoveL2D_stack_reg" %}
11363  opcode(0xDD);               /* DD /0, FLD m64real */
11364  ins_encode( OpcP, RMopc_Mem_no_oop(0x00,src),
11365              Pop_Reg_DPR(dst) );
11366  ins_pipe( fpu_reg_mem );
11367%}
11368
11369
11370instruct MoveL2D_stack_reg_sse(regD dst, stackSlotL src) %{
11371  predicate(UseSSE>=2 && UseXmmLoadAndClearUpper);
11372  match(Set dst (MoveL2D src));
11373  effect(DEF dst, USE src);
11374
11375  ins_cost(95);
11376  format %{ "MOVSD  $dst,$src\t# MoveL2D_stack_reg_sse" %}
11377  ins_encode %{
11378    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11379  %}
11380  ins_pipe( pipe_slow );
11381%}
11382
11383instruct MoveL2D_stack_reg_sse_partial(regD dst, stackSlotL src) %{
11384  predicate(UseSSE>=2 && !UseXmmLoadAndClearUpper);
11385  match(Set dst (MoveL2D src));
11386  effect(DEF dst, USE src);
11387
11388  ins_cost(95);
11389  format %{ "MOVLPD $dst,$src\t# MoveL2D_stack_reg_sse" %}
11390  ins_encode %{
11391    __ movdbl($dst$$XMMRegister, Address(rsp, $src$$disp));
11392  %}
11393  ins_pipe( pipe_slow );
11394%}
11395
11396instruct MoveL2D_reg_reg_sse(regD dst, eRegL src, regD tmp) %{
11397  predicate(UseSSE>=2);
11398  match(Set dst (MoveL2D src));
11399  effect(TEMP dst, USE src, TEMP tmp);
11400  ins_cost(85);
11401  format %{ "MOVD   $dst,$src.lo\n\t"
11402            "MOVD   $tmp,$src.hi\n\t"
11403            "PUNPCKLDQ $dst,$tmp\t# MoveL2D_reg_reg_sse" %}
11404  ins_encode %{
11405    __ movdl($dst$$XMMRegister, $src$$Register);
11406    __ movdl($tmp$$XMMRegister, HIGH_FROM_LOW($src$$Register));
11407    __ punpckldq($dst$$XMMRegister, $tmp$$XMMRegister);
11408  %}
11409  ins_pipe( pipe_slow );
11410%}
11411
11412
11413// =======================================================================
11414// fast clearing of an array
11415instruct rep_stos(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11416  predicate(!UseFastStosb);
11417  match(Set dummy (ClearArray cnt base));
11418  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11419  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11420            "SHL    ECX,1\t# Convert doublewords to words\n\t"
11421            "REP STOS\t# store EAX into [EDI++] while ECX--" %}
11422  ins_encode %{
11423    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11424  %}
11425  ins_pipe( pipe_slow );
11426%}
11427
11428instruct rep_fast_stosb(eCXRegI cnt, eDIRegP base, eAXRegI zero, Universe dummy, eFlagsReg cr) %{
11429  predicate(UseFastStosb);
11430  match(Set dummy (ClearArray cnt base));
11431  effect(USE_KILL cnt, USE_KILL base, KILL zero, KILL cr);
11432  format %{ "XOR    EAX,EAX\t# ClearArray:\n\t"
11433            "SHL    ECX,3\t# Convert doublewords to bytes\n\t"
11434            "REP STOSB\t# store EAX into [EDI++] while ECX--" %}
11435  ins_encode %{
11436    __ clear_mem($base$$Register, $cnt$$Register, $zero$$Register);
11437  %}
11438  ins_pipe( pipe_slow );
11439%}
11440
11441instruct string_compareL(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11442                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11443  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LL);
11444  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11445  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11446
11447  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11448  ins_encode %{
11449    __ string_compare($str1$$Register, $str2$$Register,
11450                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11451                      $tmp1$$XMMRegister, StrIntrinsicNode::LL);
11452  %}
11453  ins_pipe( pipe_slow );
11454%}
11455
11456instruct string_compareU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11457                         eAXRegI result, regD tmp1, eFlagsReg cr) %{
11458  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UU);
11459  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11460  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11461
11462  format %{ "String Compare char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11463  ins_encode %{
11464    __ string_compare($str1$$Register, $str2$$Register,
11465                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11466                      $tmp1$$XMMRegister, StrIntrinsicNode::UU);
11467  %}
11468  ins_pipe( pipe_slow );
11469%}
11470
11471instruct string_compareLU(eDIRegP str1, eCXRegI cnt1, eSIRegP str2, eDXRegI cnt2,
11472                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11473  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::LU);
11474  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11475  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11476
11477  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11478  ins_encode %{
11479    __ string_compare($str1$$Register, $str2$$Register,
11480                      $cnt1$$Register, $cnt2$$Register, $result$$Register,
11481                      $tmp1$$XMMRegister, StrIntrinsicNode::LU);
11482  %}
11483  ins_pipe( pipe_slow );
11484%}
11485
11486instruct string_compareUL(eSIRegP str1, eDXRegI cnt1, eDIRegP str2, eCXRegI cnt2,
11487                          eAXRegI result, regD tmp1, eFlagsReg cr) %{
11488  predicate(((StrCompNode*)n)->encoding() == StrIntrinsicNode::UL);
11489  match(Set result (StrComp (Binary str1 cnt1) (Binary str2 cnt2)));
11490  effect(TEMP tmp1, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL cr);
11491
11492  format %{ "String Compare byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL $tmp1" %}
11493  ins_encode %{
11494    __ string_compare($str2$$Register, $str1$$Register,
11495                      $cnt2$$Register, $cnt1$$Register, $result$$Register,
11496                      $tmp1$$XMMRegister, StrIntrinsicNode::UL);
11497  %}
11498  ins_pipe( pipe_slow );
11499%}
11500
11501// fast string equals
11502instruct string_equals(eDIRegP str1, eSIRegP str2, eCXRegI cnt, eAXRegI result,
11503                       regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr) %{
11504  match(Set result (StrEquals (Binary str1 str2) cnt));
11505  effect(TEMP tmp1, TEMP tmp2, USE_KILL str1, USE_KILL str2, USE_KILL cnt, KILL tmp3, KILL cr);
11506
11507  format %{ "String Equals $str1,$str2,$cnt -> $result    // KILL $tmp1, $tmp2, $tmp3" %}
11508  ins_encode %{
11509    __ arrays_equals(false, $str1$$Register, $str2$$Register,
11510                     $cnt$$Register, $result$$Register, $tmp3$$Register,
11511                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11512  %}
11513
11514  ins_pipe( pipe_slow );
11515%}
11516
11517// fast search of substring with known size.
11518instruct string_indexof_conL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11519                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11520  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11521  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11522  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11523
11524  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11525  ins_encode %{
11526    int icnt2 = (int)$int_cnt2$$constant;
11527    if (icnt2 >= 16) {
11528      // IndexOf for constant substrings with size >= 16 elements
11529      // which don't need to be loaded through stack.
11530      __ string_indexofC8($str1$$Register, $str2$$Register,
11531                          $cnt1$$Register, $cnt2$$Register,
11532                          icnt2, $result$$Register,
11533                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11534    } else {
11535      // Small strings are loaded through stack if they cross page boundary.
11536      __ string_indexof($str1$$Register, $str2$$Register,
11537                        $cnt1$$Register, $cnt2$$Register,
11538                        icnt2, $result$$Register,
11539                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11540    }
11541  %}
11542  ins_pipe( pipe_slow );
11543%}
11544
11545// fast search of substring with known size.
11546instruct string_indexof_conU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11547                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11548  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11549  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11550  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11551
11552  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11553  ins_encode %{
11554    int icnt2 = (int)$int_cnt2$$constant;
11555    if (icnt2 >= 8) {
11556      // IndexOf for constant substrings with size >= 8 elements
11557      // which don't need to be loaded through stack.
11558      __ string_indexofC8($str1$$Register, $str2$$Register,
11559                          $cnt1$$Register, $cnt2$$Register,
11560                          icnt2, $result$$Register,
11561                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11562    } else {
11563      // Small strings are loaded through stack if they cross page boundary.
11564      __ string_indexof($str1$$Register, $str2$$Register,
11565                        $cnt1$$Register, $cnt2$$Register,
11566                        icnt2, $result$$Register,
11567                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11568    }
11569  %}
11570  ins_pipe( pipe_slow );
11571%}
11572
11573// fast search of substring with known size.
11574instruct string_indexof_conUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, immI int_cnt2,
11575                             eBXRegI result, regD vec, eAXRegI cnt2, eCXRegI tmp, eFlagsReg cr) %{
11576  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11577  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 int_cnt2)));
11578  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, KILL cnt2, KILL tmp, KILL cr);
11579
11580  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$int_cnt2 -> $result   // KILL $vec, $cnt1, $cnt2, $tmp" %}
11581  ins_encode %{
11582    int icnt2 = (int)$int_cnt2$$constant;
11583    if (icnt2 >= 8) {
11584      // IndexOf for constant substrings with size >= 8 elements
11585      // which don't need to be loaded through stack.
11586      __ string_indexofC8($str1$$Register, $str2$$Register,
11587                          $cnt1$$Register, $cnt2$$Register,
11588                          icnt2, $result$$Register,
11589                          $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11590    } else {
11591      // Small strings are loaded through stack if they cross page boundary.
11592      __ string_indexof($str1$$Register, $str2$$Register,
11593                        $cnt1$$Register, $cnt2$$Register,
11594                        icnt2, $result$$Register,
11595                        $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11596    }
11597  %}
11598  ins_pipe( pipe_slow );
11599%}
11600
11601instruct string_indexofL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11602                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11603  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::LL));
11604  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11605  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11606
11607  format %{ "String IndexOf byte[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11608  ins_encode %{
11609    __ string_indexof($str1$$Register, $str2$$Register,
11610                      $cnt1$$Register, $cnt2$$Register,
11611                      (-1), $result$$Register,
11612                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::LL);
11613  %}
11614  ins_pipe( pipe_slow );
11615%}
11616
11617instruct string_indexofU(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11618                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11619  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UU));
11620  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11621  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11622
11623  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11624  ins_encode %{
11625    __ string_indexof($str1$$Register, $str2$$Register,
11626                      $cnt1$$Register, $cnt2$$Register,
11627                      (-1), $result$$Register,
11628                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UU);
11629  %}
11630  ins_pipe( pipe_slow );
11631%}
11632
11633instruct string_indexofUL(eDIRegP str1, eDXRegI cnt1, eSIRegP str2, eAXRegI cnt2,
11634                         eBXRegI result, regD vec, eCXRegI tmp, eFlagsReg cr) %{
11635  predicate(UseSSE42Intrinsics && (((StrIndexOfNode*)n)->encoding() == StrIntrinsicNode::UL));
11636  match(Set result (StrIndexOf (Binary str1 cnt1) (Binary str2 cnt2)));
11637  effect(TEMP vec, USE_KILL str1, USE_KILL str2, USE_KILL cnt1, USE_KILL cnt2, KILL tmp, KILL cr);
11638
11639  format %{ "String IndexOf char[] $str1,$cnt1,$str2,$cnt2 -> $result   // KILL all" %}
11640  ins_encode %{
11641    __ string_indexof($str1$$Register, $str2$$Register,
11642                      $cnt1$$Register, $cnt2$$Register,
11643                      (-1), $result$$Register,
11644                      $vec$$XMMRegister, $tmp$$Register, StrIntrinsicNode::UL);
11645  %}
11646  ins_pipe( pipe_slow );
11647%}
11648
11649instruct string_indexofU_char(eDIRegP str1, eDXRegI cnt1, eAXRegI ch,
11650                              eBXRegI result, regD vec1, regD vec2, regD vec3, eCXRegI tmp, eFlagsReg cr) %{
11651  predicate(UseSSE42Intrinsics);
11652  match(Set result (StrIndexOfChar (Binary str1 cnt1) ch));
11653  effect(TEMP vec1, TEMP vec2, TEMP vec3, USE_KILL str1, USE_KILL cnt1, USE_KILL ch, TEMP tmp, KILL cr);
11654  format %{ "String IndexOf char[] $str1,$cnt1,$ch -> $result   // KILL all" %}
11655  ins_encode %{
11656    __ string_indexof_char($str1$$Register, $cnt1$$Register, $ch$$Register, $result$$Register,
11657                           $vec1$$XMMRegister, $vec2$$XMMRegister, $vec3$$XMMRegister, $tmp$$Register);
11658  %}
11659  ins_pipe( pipe_slow );
11660%}
11661
11662// fast array equals
11663instruct array_equalsB(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11664                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11665%{
11666  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::LL);
11667  match(Set result (AryEq ary1 ary2));
11668  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11669  //ins_cost(300);
11670
11671  format %{ "Array Equals byte[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11672  ins_encode %{
11673    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11674                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11675                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, false /* char */);
11676  %}
11677  ins_pipe( pipe_slow );
11678%}
11679
11680instruct array_equalsC(eDIRegP ary1, eSIRegP ary2, eAXRegI result,
11681                       regD tmp1, regD tmp2, eCXRegI tmp3, eBXRegI tmp4, eFlagsReg cr)
11682%{
11683  predicate(((AryEqNode*)n)->encoding() == StrIntrinsicNode::UU);
11684  match(Set result (AryEq ary1 ary2));
11685  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL ary2, KILL tmp3, KILL tmp4, KILL cr);
11686  //ins_cost(300);
11687
11688  format %{ "Array Equals char[] $ary1,$ary2 -> $result   // KILL $tmp1, $tmp2, $tmp3, $tmp4" %}
11689  ins_encode %{
11690    __ arrays_equals(true, $ary1$$Register, $ary2$$Register,
11691                     $tmp3$$Register, $result$$Register, $tmp4$$Register,
11692                     $tmp1$$XMMRegister, $tmp2$$XMMRegister, true /* char */);
11693  %}
11694  ins_pipe( pipe_slow );
11695%}
11696
11697instruct has_negatives(eSIRegP ary1, eCXRegI len, eAXRegI result,
11698                      regD tmp1, regD tmp2, eBXRegI tmp3, eFlagsReg cr)
11699%{
11700  match(Set result (HasNegatives ary1 len));
11701  effect(TEMP tmp1, TEMP tmp2, USE_KILL ary1, USE_KILL len, KILL tmp3, KILL cr);
11702
11703  format %{ "has negatives byte[] $ary1,$len -> $result   // KILL $tmp1, $tmp2, $tmp3" %}
11704  ins_encode %{
11705    __ has_negatives($ary1$$Register, $len$$Register,
11706                     $result$$Register, $tmp3$$Register,
11707                     $tmp1$$XMMRegister, $tmp2$$XMMRegister);
11708  %}
11709  ins_pipe( pipe_slow );
11710%}
11711
11712// fast char[] to byte[] compression
11713instruct string_compress(eSIRegP src, eDIRegP dst, eDXRegI len, regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11714                         eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11715  match(Set result (StrCompressedCopy src (Binary dst len)));
11716  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11717
11718  format %{ "String Compress $src,$dst -> $result    // KILL RAX, RCX, RDX" %}
11719  ins_encode %{
11720    __ char_array_compress($src$$Register, $dst$$Register, $len$$Register,
11721                           $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11722                           $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11723  %}
11724  ins_pipe( pipe_slow );
11725%}
11726
11727// fast byte[] to char[] inflation
11728instruct string_inflate(Universe dummy, eSIRegP src, eDIRegP dst, eDXRegI len,
11729                        regD tmp1, eCXRegI tmp2, eFlagsReg cr) %{
11730  match(Set dummy (StrInflatedCopy src (Binary dst len)));
11731  effect(TEMP tmp1, TEMP tmp2, USE_KILL src, USE_KILL dst, USE_KILL len, KILL cr);
11732
11733  format %{ "String Inflate $src,$dst    // KILL $tmp1, $tmp2" %}
11734  ins_encode %{
11735    __ byte_array_inflate($src$$Register, $dst$$Register, $len$$Register,
11736                          $tmp1$$XMMRegister, $tmp2$$Register);
11737  %}
11738  ins_pipe( pipe_slow );
11739%}
11740
11741// encode char[] to byte[] in ISO_8859_1
11742instruct encode_iso_array(eSIRegP src, eDIRegP dst, eDXRegI len,
11743                          regD tmp1, regD tmp2, regD tmp3, regD tmp4,
11744                          eCXRegI tmp5, eAXRegI result, eFlagsReg cr) %{
11745  match(Set result (EncodeISOArray src (Binary dst len)));
11746  effect(TEMP tmp1, TEMP tmp2, TEMP tmp3, TEMP tmp4, USE_KILL src, USE_KILL dst, USE_KILL len, KILL tmp5, KILL cr);
11747
11748  format %{ "Encode array $src,$dst,$len -> $result    // KILL ECX, EDX, $tmp1, $tmp2, $tmp3, $tmp4, ESI, EDI " %}
11749  ins_encode %{
11750    __ encode_iso_array($src$$Register, $dst$$Register, $len$$Register,
11751                        $tmp1$$XMMRegister, $tmp2$$XMMRegister, $tmp3$$XMMRegister,
11752                        $tmp4$$XMMRegister, $tmp5$$Register, $result$$Register);
11753  %}
11754  ins_pipe( pipe_slow );
11755%}
11756
11757
11758//----------Control Flow Instructions------------------------------------------
11759// Signed compare Instructions
11760instruct compI_eReg(eFlagsReg cr, rRegI op1, rRegI op2) %{
11761  match(Set cr (CmpI op1 op2));
11762  effect( DEF cr, USE op1, USE op2 );
11763  format %{ "CMP    $op1,$op2" %}
11764  opcode(0x3B);  /* Opcode 3B /r */
11765  ins_encode( OpcP, RegReg( op1, op2) );
11766  ins_pipe( ialu_cr_reg_reg );
11767%}
11768
11769instruct compI_eReg_imm(eFlagsReg cr, rRegI op1, immI op2) %{
11770  match(Set cr (CmpI op1 op2));
11771  effect( DEF cr, USE op1 );
11772  format %{ "CMP    $op1,$op2" %}
11773  opcode(0x81,0x07);  /* Opcode 81 /7 */
11774  // ins_encode( RegImm( op1, op2) );  /* Was CmpImm */
11775  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11776  ins_pipe( ialu_cr_reg_imm );
11777%}
11778
11779// Cisc-spilled version of cmpI_eReg
11780instruct compI_eReg_mem(eFlagsReg cr, rRegI op1, memory op2) %{
11781  match(Set cr (CmpI op1 (LoadI op2)));
11782
11783  format %{ "CMP    $op1,$op2" %}
11784  ins_cost(500);
11785  opcode(0x3B);  /* Opcode 3B /r */
11786  ins_encode( OpcP, RegMem( op1, op2) );
11787  ins_pipe( ialu_cr_reg_mem );
11788%}
11789
11790instruct testI_reg( eFlagsReg cr, rRegI src, immI0 zero ) %{
11791  match(Set cr (CmpI src zero));
11792  effect( DEF cr, USE src );
11793
11794  format %{ "TEST   $src,$src" %}
11795  opcode(0x85);
11796  ins_encode( OpcP, RegReg( src, src ) );
11797  ins_pipe( ialu_cr_reg_imm );
11798%}
11799
11800instruct testI_reg_imm( eFlagsReg cr, rRegI src, immI con, immI0 zero ) %{
11801  match(Set cr (CmpI (AndI src con) zero));
11802
11803  format %{ "TEST   $src,$con" %}
11804  opcode(0xF7,0x00);
11805  ins_encode( OpcP, RegOpc(src), Con32(con) );
11806  ins_pipe( ialu_cr_reg_imm );
11807%}
11808
11809instruct testI_reg_mem( eFlagsReg cr, rRegI src, memory mem, immI0 zero ) %{
11810  match(Set cr (CmpI (AndI src mem) zero));
11811
11812  format %{ "TEST   $src,$mem" %}
11813  opcode(0x85);
11814  ins_encode( OpcP, RegMem( src, mem ) );
11815  ins_pipe( ialu_cr_reg_mem );
11816%}
11817
11818// Unsigned compare Instructions; really, same as signed except they
11819// produce an eFlagsRegU instead of eFlagsReg.
11820instruct compU_eReg(eFlagsRegU cr, rRegI op1, rRegI op2) %{
11821  match(Set cr (CmpU op1 op2));
11822
11823  format %{ "CMPu   $op1,$op2" %}
11824  opcode(0x3B);  /* Opcode 3B /r */
11825  ins_encode( OpcP, RegReg( op1, op2) );
11826  ins_pipe( ialu_cr_reg_reg );
11827%}
11828
11829instruct compU_eReg_imm(eFlagsRegU cr, rRegI op1, immI op2) %{
11830  match(Set cr (CmpU op1 op2));
11831
11832  format %{ "CMPu   $op1,$op2" %}
11833  opcode(0x81,0x07);  /* Opcode 81 /7 */
11834  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11835  ins_pipe( ialu_cr_reg_imm );
11836%}
11837
11838// // Cisc-spilled version of cmpU_eReg
11839instruct compU_eReg_mem(eFlagsRegU cr, rRegI op1, memory op2) %{
11840  match(Set cr (CmpU op1 (LoadI op2)));
11841
11842  format %{ "CMPu   $op1,$op2" %}
11843  ins_cost(500);
11844  opcode(0x3B);  /* Opcode 3B /r */
11845  ins_encode( OpcP, RegMem( op1, op2) );
11846  ins_pipe( ialu_cr_reg_mem );
11847%}
11848
11849// // Cisc-spilled version of cmpU_eReg
11850//instruct compU_mem_eReg(eFlagsRegU cr, memory op1, rRegI op2) %{
11851//  match(Set cr (CmpU (LoadI op1) op2));
11852//
11853//  format %{ "CMPu   $op1,$op2" %}
11854//  ins_cost(500);
11855//  opcode(0x39);  /* Opcode 39 /r */
11856//  ins_encode( OpcP, RegMem( op1, op2) );
11857//%}
11858
11859instruct testU_reg( eFlagsRegU cr, rRegI src, immI0 zero ) %{
11860  match(Set cr (CmpU src zero));
11861
11862  format %{ "TESTu  $src,$src" %}
11863  opcode(0x85);
11864  ins_encode( OpcP, RegReg( src, src ) );
11865  ins_pipe( ialu_cr_reg_imm );
11866%}
11867
11868// Unsigned pointer compare Instructions
11869instruct compP_eReg(eFlagsRegU cr, eRegP op1, eRegP op2) %{
11870  match(Set cr (CmpP op1 op2));
11871
11872  format %{ "CMPu   $op1,$op2" %}
11873  opcode(0x3B);  /* Opcode 3B /r */
11874  ins_encode( OpcP, RegReg( op1, op2) );
11875  ins_pipe( ialu_cr_reg_reg );
11876%}
11877
11878instruct compP_eReg_imm(eFlagsRegU cr, eRegP op1, immP op2) %{
11879  match(Set cr (CmpP op1 op2));
11880
11881  format %{ "CMPu   $op1,$op2" %}
11882  opcode(0x81,0x07);  /* Opcode 81 /7 */
11883  ins_encode( OpcSErm( op1, op2 ), Con8or32( op2 ) );
11884  ins_pipe( ialu_cr_reg_imm );
11885%}
11886
11887// // Cisc-spilled version of cmpP_eReg
11888instruct compP_eReg_mem(eFlagsRegU cr, eRegP op1, memory op2) %{
11889  match(Set cr (CmpP op1 (LoadP op2)));
11890
11891  format %{ "CMPu   $op1,$op2" %}
11892  ins_cost(500);
11893  opcode(0x3B);  /* Opcode 3B /r */
11894  ins_encode( OpcP, RegMem( op1, op2) );
11895  ins_pipe( ialu_cr_reg_mem );
11896%}
11897
11898// // Cisc-spilled version of cmpP_eReg
11899//instruct compP_mem_eReg(eFlagsRegU cr, memory op1, eRegP op2) %{
11900//  match(Set cr (CmpP (LoadP op1) op2));
11901//
11902//  format %{ "CMPu   $op1,$op2" %}
11903//  ins_cost(500);
11904//  opcode(0x39);  /* Opcode 39 /r */
11905//  ins_encode( OpcP, RegMem( op1, op2) );
11906//%}
11907
11908// Compare raw pointer (used in out-of-heap check).
11909// Only works because non-oop pointers must be raw pointers
11910// and raw pointers have no anti-dependencies.
11911instruct compP_mem_eReg( eFlagsRegU cr, eRegP op1, memory op2 ) %{
11912  predicate( n->in(2)->in(2)->bottom_type()->reloc() == relocInfo::none );
11913  match(Set cr (CmpP op1 (LoadP op2)));
11914
11915  format %{ "CMPu   $op1,$op2" %}
11916  opcode(0x3B);  /* Opcode 3B /r */
11917  ins_encode( OpcP, RegMem( op1, op2) );
11918  ins_pipe( ialu_cr_reg_mem );
11919%}
11920
11921//
11922// This will generate a signed flags result. This should be ok
11923// since any compare to a zero should be eq/neq.
11924instruct testP_reg( eFlagsReg cr, eRegP src, immP0 zero ) %{
11925  match(Set cr (CmpP src zero));
11926
11927  format %{ "TEST   $src,$src" %}
11928  opcode(0x85);
11929  ins_encode( OpcP, RegReg( src, src ) );
11930  ins_pipe( ialu_cr_reg_imm );
11931%}
11932
11933// Cisc-spilled version of testP_reg
11934// This will generate a signed flags result. This should be ok
11935// since any compare to a zero should be eq/neq.
11936instruct testP_Reg_mem( eFlagsReg cr, memory op, immI0 zero ) %{
11937  match(Set cr (CmpP (LoadP op) zero));
11938
11939  format %{ "TEST   $op,0xFFFFFFFF" %}
11940  ins_cost(500);
11941  opcode(0xF7);               /* Opcode F7 /0 */
11942  ins_encode( OpcP, RMopc_Mem(0x00,op), Con_d32(0xFFFFFFFF) );
11943  ins_pipe( ialu_cr_reg_imm );
11944%}
11945
11946// Yanked all unsigned pointer compare operations.
11947// Pointer compares are done with CmpP which is already unsigned.
11948
11949//----------Max and Min--------------------------------------------------------
11950// Min Instructions
11951////
11952//   *** Min and Max using the conditional move are slower than the
11953//   *** branch version on a Pentium III.
11954// // Conditional move for min
11955//instruct cmovI_reg_lt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11956//  effect( USE_DEF op2, USE op1, USE cr );
11957//  format %{ "CMOVlt $op2,$op1\t! min" %}
11958//  opcode(0x4C,0x0F);
11959//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11960//  ins_pipe( pipe_cmov_reg );
11961//%}
11962//
11963//// Min Register with Register (P6 version)
11964//instruct minI_eReg_p6( rRegI op1, rRegI op2 ) %{
11965//  predicate(VM_Version::supports_cmov() );
11966//  match(Set op2 (MinI op1 op2));
11967//  ins_cost(200);
11968//  expand %{
11969//    eFlagsReg cr;
11970//    compI_eReg(cr,op1,op2);
11971//    cmovI_reg_lt(op2,op1,cr);
11972//  %}
11973//%}
11974
11975// Min Register with Register (generic version)
11976instruct minI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
11977  match(Set dst (MinI dst src));
11978  effect(KILL flags);
11979  ins_cost(300);
11980
11981  format %{ "MIN    $dst,$src" %}
11982  opcode(0xCC);
11983  ins_encode( min_enc(dst,src) );
11984  ins_pipe( pipe_slow );
11985%}
11986
11987// Max Register with Register
11988//   *** Min and Max using the conditional move are slower than the
11989//   *** branch version on a Pentium III.
11990// // Conditional move for max
11991//instruct cmovI_reg_gt( rRegI op2, rRegI op1, eFlagsReg cr ) %{
11992//  effect( USE_DEF op2, USE op1, USE cr );
11993//  format %{ "CMOVgt $op2,$op1\t! max" %}
11994//  opcode(0x4F,0x0F);
11995//  ins_encode( OpcS, OpcP, RegReg( op2, op1 ) );
11996//  ins_pipe( pipe_cmov_reg );
11997//%}
11998//
11999// // Max Register with Register (P6 version)
12000//instruct maxI_eReg_p6( rRegI op1, rRegI op2 ) %{
12001//  predicate(VM_Version::supports_cmov() );
12002//  match(Set op2 (MaxI op1 op2));
12003//  ins_cost(200);
12004//  expand %{
12005//    eFlagsReg cr;
12006//    compI_eReg(cr,op1,op2);
12007//    cmovI_reg_gt(op2,op1,cr);
12008//  %}
12009//%}
12010
12011// Max Register with Register (generic version)
12012instruct maxI_eReg(rRegI dst, rRegI src, eFlagsReg flags) %{
12013  match(Set dst (MaxI dst src));
12014  effect(KILL flags);
12015  ins_cost(300);
12016
12017  format %{ "MAX    $dst,$src" %}
12018  opcode(0xCC);
12019  ins_encode( max_enc(dst,src) );
12020  ins_pipe( pipe_slow );
12021%}
12022
12023// ============================================================================
12024// Counted Loop limit node which represents exact final iterator value.
12025// Note: the resulting value should fit into integer range since
12026// counted loops have limit check on overflow.
12027instruct loopLimit_eReg(eAXRegI limit, nadxRegI init, immI stride, eDXRegI limit_hi, nadxRegI tmp, eFlagsReg flags) %{
12028  match(Set limit (LoopLimit (Binary init limit) stride));
12029  effect(TEMP limit_hi, TEMP tmp, KILL flags);
12030  ins_cost(300);
12031
12032  format %{ "loopLimit $init,$limit,$stride  # $limit = $init + $stride *( $limit - $init + $stride -1)/ $stride, kills $limit_hi" %}
12033  ins_encode %{
12034    int strd = (int)$stride$$constant;
12035    assert(strd != 1 && strd != -1, "sanity");
12036    int m1 = (strd > 0) ? 1 : -1;
12037    // Convert limit to long (EAX:EDX)
12038    __ cdql();
12039    // Convert init to long (init:tmp)
12040    __ movl($tmp$$Register, $init$$Register);
12041    __ sarl($tmp$$Register, 31);
12042    // $limit - $init
12043    __ subl($limit$$Register, $init$$Register);
12044    __ sbbl($limit_hi$$Register, $tmp$$Register);
12045    // + ($stride - 1)
12046    if (strd > 0) {
12047      __ addl($limit$$Register, (strd - 1));
12048      __ adcl($limit_hi$$Register, 0);
12049      __ movl($tmp$$Register, strd);
12050    } else {
12051      __ addl($limit$$Register, (strd + 1));
12052      __ adcl($limit_hi$$Register, -1);
12053      __ lneg($limit_hi$$Register, $limit$$Register);
12054      __ movl($tmp$$Register, -strd);
12055    }
12056    // signed devision: (EAX:EDX) / pos_stride
12057    __ idivl($tmp$$Register);
12058    if (strd < 0) {
12059      // restore sign
12060      __ negl($tmp$$Register);
12061    }
12062    // (EAX) * stride
12063    __ mull($tmp$$Register);
12064    // + init (ignore upper bits)
12065    __ addl($limit$$Register, $init$$Register);
12066  %}
12067  ins_pipe( pipe_slow );
12068%}
12069
12070// ============================================================================
12071// Branch Instructions
12072// Jump Table
12073instruct jumpXtnd(rRegI switch_val) %{
12074  match(Jump switch_val);
12075  ins_cost(350);
12076  format %{  "JMP    [$constantaddress](,$switch_val,1)\n\t" %}
12077  ins_encode %{
12078    // Jump to Address(table_base + switch_reg)
12079    Address index(noreg, $switch_val$$Register, Address::times_1);
12080    __ jump(ArrayAddress($constantaddress, index));
12081  %}
12082  ins_pipe(pipe_jmp);
12083%}
12084
12085// Jump Direct - Label defines a relative address from JMP+1
12086instruct jmpDir(label labl) %{
12087  match(Goto);
12088  effect(USE labl);
12089
12090  ins_cost(300);
12091  format %{ "JMP    $labl" %}
12092  size(5);
12093  ins_encode %{
12094    Label* L = $labl$$label;
12095    __ jmp(*L, false); // Always long jump
12096  %}
12097  ins_pipe( pipe_jmp );
12098%}
12099
12100// Jump Direct Conditional - Label defines a relative address from Jcc+1
12101instruct jmpCon(cmpOp cop, eFlagsReg cr, label labl) %{
12102  match(If cop cr);
12103  effect(USE labl);
12104
12105  ins_cost(300);
12106  format %{ "J$cop    $labl" %}
12107  size(6);
12108  ins_encode %{
12109    Label* L = $labl$$label;
12110    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12111  %}
12112  ins_pipe( pipe_jcc );
12113%}
12114
12115// Jump Direct Conditional - Label defines a relative address from Jcc+1
12116instruct jmpLoopEnd(cmpOp cop, eFlagsReg cr, label labl) %{
12117  match(CountedLoopEnd cop cr);
12118  effect(USE labl);
12119
12120  ins_cost(300);
12121  format %{ "J$cop    $labl\t# Loop end" %}
12122  size(6);
12123  ins_encode %{
12124    Label* L = $labl$$label;
12125    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12126  %}
12127  ins_pipe( pipe_jcc );
12128%}
12129
12130// Jump Direct Conditional - Label defines a relative address from Jcc+1
12131instruct jmpLoopEndU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12132  match(CountedLoopEnd cop cmp);
12133  effect(USE labl);
12134
12135  ins_cost(300);
12136  format %{ "J$cop,u  $labl\t# Loop end" %}
12137  size(6);
12138  ins_encode %{
12139    Label* L = $labl$$label;
12140    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12141  %}
12142  ins_pipe( pipe_jcc );
12143%}
12144
12145instruct jmpLoopEndUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12146  match(CountedLoopEnd cop cmp);
12147  effect(USE labl);
12148
12149  ins_cost(200);
12150  format %{ "J$cop,u  $labl\t# Loop end" %}
12151  size(6);
12152  ins_encode %{
12153    Label* L = $labl$$label;
12154    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12155  %}
12156  ins_pipe( pipe_jcc );
12157%}
12158
12159// Jump Direct Conditional - using unsigned comparison
12160instruct jmpConU(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12161  match(If cop cmp);
12162  effect(USE labl);
12163
12164  ins_cost(300);
12165  format %{ "J$cop,u  $labl" %}
12166  size(6);
12167  ins_encode %{
12168    Label* L = $labl$$label;
12169    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12170  %}
12171  ins_pipe(pipe_jcc);
12172%}
12173
12174instruct jmpConUCF(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12175  match(If cop cmp);
12176  effect(USE labl);
12177
12178  ins_cost(200);
12179  format %{ "J$cop,u  $labl" %}
12180  size(6);
12181  ins_encode %{
12182    Label* L = $labl$$label;
12183    __ jcc((Assembler::Condition)($cop$$cmpcode), *L, false); // Always long jump
12184  %}
12185  ins_pipe(pipe_jcc);
12186%}
12187
12188instruct jmpConUCF2(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12189  match(If cop cmp);
12190  effect(USE labl);
12191
12192  ins_cost(200);
12193  format %{ $$template
12194    if ($cop$$cmpcode == Assembler::notEqual) {
12195      $$emit$$"JP,u   $labl\n\t"
12196      $$emit$$"J$cop,u   $labl"
12197    } else {
12198      $$emit$$"JP,u   done\n\t"
12199      $$emit$$"J$cop,u   $labl\n\t"
12200      $$emit$$"done:"
12201    }
12202  %}
12203  ins_encode %{
12204    Label* l = $labl$$label;
12205    if ($cop$$cmpcode == Assembler::notEqual) {
12206      __ jcc(Assembler::parity, *l, false);
12207      __ jcc(Assembler::notEqual, *l, false);
12208    } else if ($cop$$cmpcode == Assembler::equal) {
12209      Label done;
12210      __ jccb(Assembler::parity, done);
12211      __ jcc(Assembler::equal, *l, false);
12212      __ bind(done);
12213    } else {
12214       ShouldNotReachHere();
12215    }
12216  %}
12217  ins_pipe(pipe_jcc);
12218%}
12219
12220// ============================================================================
12221// The 2nd slow-half of a subtype check.  Scan the subklass's 2ndary superklass
12222// array for an instance of the superklass.  Set a hidden internal cache on a
12223// hit (cache is checked with exposed code in gen_subtype_check()).  Return
12224// NZ for a miss or zero for a hit.  The encoding ALSO sets flags.
12225instruct partialSubtypeCheck( eDIRegP result, eSIRegP sub, eAXRegP super, eCXRegI rcx, eFlagsReg cr ) %{
12226  match(Set result (PartialSubtypeCheck sub super));
12227  effect( KILL rcx, KILL cr );
12228
12229  ins_cost(1100);  // slightly larger than the next version
12230  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12231            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12232            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12233            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12234            "JNE,s  miss\t\t# Missed: EDI not-zero\n\t"
12235            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache\n\t"
12236            "XOR    $result,$result\t\t Hit: EDI zero\n\t"
12237     "miss:\t" %}
12238
12239  opcode(0x1); // Force a XOR of EDI
12240  ins_encode( enc_PartialSubtypeCheck() );
12241  ins_pipe( pipe_slow );
12242%}
12243
12244instruct partialSubtypeCheck_vs_Zero( eFlagsReg cr, eSIRegP sub, eAXRegP super, eCXRegI rcx, eDIRegP result, immP0 zero ) %{
12245  match(Set cr (CmpP (PartialSubtypeCheck sub super) zero));
12246  effect( KILL rcx, KILL result );
12247
12248  ins_cost(1000);
12249  format %{ "MOV    EDI,[$sub+Klass::secondary_supers]\n\t"
12250            "MOV    ECX,[EDI+ArrayKlass::length]\t# length to scan\n\t"
12251            "ADD    EDI,ArrayKlass::base_offset\t# Skip to start of data; set NZ in case count is zero\n\t"
12252            "REPNE SCASD\t# Scan *EDI++ for a match with EAX while CX-- != 0\n\t"
12253            "JNE,s  miss\t\t# Missed: flags NZ\n\t"
12254            "MOV    [$sub+Klass::secondary_super_cache],$super\t# Hit: update cache, flags Z\n\t"
12255     "miss:\t" %}
12256
12257  opcode(0x0);  // No need to XOR EDI
12258  ins_encode( enc_PartialSubtypeCheck() );
12259  ins_pipe( pipe_slow );
12260%}
12261
12262// ============================================================================
12263// Branch Instructions -- short offset versions
12264//
12265// These instructions are used to replace jumps of a long offset (the default
12266// match) with jumps of a shorter offset.  These instructions are all tagged
12267// with the ins_short_branch attribute, which causes the ADLC to suppress the
12268// match rules in general matching.  Instead, the ADLC generates a conversion
12269// method in the MachNode which can be used to do in-place replacement of the
12270// long variant with the shorter variant.  The compiler will determine if a
12271// branch can be taken by the is_short_branch_offset() predicate in the machine
12272// specific code section of the file.
12273
12274// Jump Direct - Label defines a relative address from JMP+1
12275instruct jmpDir_short(label labl) %{
12276  match(Goto);
12277  effect(USE labl);
12278
12279  ins_cost(300);
12280  format %{ "JMP,s  $labl" %}
12281  size(2);
12282  ins_encode %{
12283    Label* L = $labl$$label;
12284    __ jmpb(*L);
12285  %}
12286  ins_pipe( pipe_jmp );
12287  ins_short_branch(1);
12288%}
12289
12290// Jump Direct Conditional - Label defines a relative address from Jcc+1
12291instruct jmpCon_short(cmpOp cop, eFlagsReg cr, label labl) %{
12292  match(If cop cr);
12293  effect(USE labl);
12294
12295  ins_cost(300);
12296  format %{ "J$cop,s  $labl" %}
12297  size(2);
12298  ins_encode %{
12299    Label* L = $labl$$label;
12300    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12301  %}
12302  ins_pipe( pipe_jcc );
12303  ins_short_branch(1);
12304%}
12305
12306// Jump Direct Conditional - Label defines a relative address from Jcc+1
12307instruct jmpLoopEnd_short(cmpOp cop, eFlagsReg cr, label labl) %{
12308  match(CountedLoopEnd cop cr);
12309  effect(USE labl);
12310
12311  ins_cost(300);
12312  format %{ "J$cop,s  $labl\t# Loop end" %}
12313  size(2);
12314  ins_encode %{
12315    Label* L = $labl$$label;
12316    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12317  %}
12318  ins_pipe( pipe_jcc );
12319  ins_short_branch(1);
12320%}
12321
12322// Jump Direct Conditional - Label defines a relative address from Jcc+1
12323instruct jmpLoopEndU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12324  match(CountedLoopEnd cop cmp);
12325  effect(USE labl);
12326
12327  ins_cost(300);
12328  format %{ "J$cop,us $labl\t# Loop end" %}
12329  size(2);
12330  ins_encode %{
12331    Label* L = $labl$$label;
12332    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12333  %}
12334  ins_pipe( pipe_jcc );
12335  ins_short_branch(1);
12336%}
12337
12338instruct jmpLoopEndUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12339  match(CountedLoopEnd cop cmp);
12340  effect(USE labl);
12341
12342  ins_cost(300);
12343  format %{ "J$cop,us $labl\t# Loop end" %}
12344  size(2);
12345  ins_encode %{
12346    Label* L = $labl$$label;
12347    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12348  %}
12349  ins_pipe( pipe_jcc );
12350  ins_short_branch(1);
12351%}
12352
12353// Jump Direct Conditional - using unsigned comparison
12354instruct jmpConU_short(cmpOpU cop, eFlagsRegU cmp, label labl) %{
12355  match(If cop cmp);
12356  effect(USE labl);
12357
12358  ins_cost(300);
12359  format %{ "J$cop,us $labl" %}
12360  size(2);
12361  ins_encode %{
12362    Label* L = $labl$$label;
12363    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12364  %}
12365  ins_pipe( pipe_jcc );
12366  ins_short_branch(1);
12367%}
12368
12369instruct jmpConUCF_short(cmpOpUCF cop, eFlagsRegUCF cmp, label labl) %{
12370  match(If cop cmp);
12371  effect(USE labl);
12372
12373  ins_cost(300);
12374  format %{ "J$cop,us $labl" %}
12375  size(2);
12376  ins_encode %{
12377    Label* L = $labl$$label;
12378    __ jccb((Assembler::Condition)($cop$$cmpcode), *L);
12379  %}
12380  ins_pipe( pipe_jcc );
12381  ins_short_branch(1);
12382%}
12383
12384instruct jmpConUCF2_short(cmpOpUCF2 cop, eFlagsRegUCF cmp, label labl) %{
12385  match(If cop cmp);
12386  effect(USE labl);
12387
12388  ins_cost(300);
12389  format %{ $$template
12390    if ($cop$$cmpcode == Assembler::notEqual) {
12391      $$emit$$"JP,u,s   $labl\n\t"
12392      $$emit$$"J$cop,u,s   $labl"
12393    } else {
12394      $$emit$$"JP,u,s   done\n\t"
12395      $$emit$$"J$cop,u,s  $labl\n\t"
12396      $$emit$$"done:"
12397    }
12398  %}
12399  size(4);
12400  ins_encode %{
12401    Label* l = $labl$$label;
12402    if ($cop$$cmpcode == Assembler::notEqual) {
12403      __ jccb(Assembler::parity, *l);
12404      __ jccb(Assembler::notEqual, *l);
12405    } else if ($cop$$cmpcode == Assembler::equal) {
12406      Label done;
12407      __ jccb(Assembler::parity, done);
12408      __ jccb(Assembler::equal, *l);
12409      __ bind(done);
12410    } else {
12411       ShouldNotReachHere();
12412    }
12413  %}
12414  ins_pipe(pipe_jcc);
12415  ins_short_branch(1);
12416%}
12417
12418// ============================================================================
12419// Long Compare
12420//
12421// Currently we hold longs in 2 registers.  Comparing such values efficiently
12422// is tricky.  The flavor of compare used depends on whether we are testing
12423// for LT, LE, or EQ.  For a simple LT test we can check just the sign bit.
12424// The GE test is the negated LT test.  The LE test can be had by commuting
12425// the operands (yielding a GE test) and then negating; negate again for the
12426// GT test.  The EQ test is done by ORcc'ing the high and low halves, and the
12427// NE test is negated from that.
12428
12429// Due to a shortcoming in the ADLC, it mixes up expressions like:
12430// (foo (CmpI (CmpL X Y) 0)) and (bar (CmpI (CmpL X 0L) 0)).  Note the
12431// difference between 'Y' and '0L'.  The tree-matches for the CmpI sections
12432// are collapsed internally in the ADLC's dfa-gen code.  The match for
12433// (CmpI (CmpL X Y) 0) is silently replaced with (CmpI (CmpL X 0L) 0) and the
12434// foo match ends up with the wrong leaf.  One fix is to not match both
12435// reg-reg and reg-zero forms of long-compare.  This is unfortunate because
12436// both forms beat the trinary form of long-compare and both are very useful
12437// on Intel which has so few registers.
12438
12439// Manifest a CmpL result in an integer register.  Very painful.
12440// This is the test to avoid.
12441instruct cmpL3_reg_reg(eSIRegI dst, eRegL src1, eRegL src2, eFlagsReg flags ) %{
12442  match(Set dst (CmpL3 src1 src2));
12443  effect( KILL flags );
12444  ins_cost(1000);
12445  format %{ "XOR    $dst,$dst\n\t"
12446            "CMP    $src1.hi,$src2.hi\n\t"
12447            "JLT,s  m_one\n\t"
12448            "JGT,s  p_one\n\t"
12449            "CMP    $src1.lo,$src2.lo\n\t"
12450            "JB,s   m_one\n\t"
12451            "JEQ,s  done\n"
12452    "p_one:\tINC    $dst\n\t"
12453            "JMP,s  done\n"
12454    "m_one:\tDEC    $dst\n"
12455     "done:" %}
12456  ins_encode %{
12457    Label p_one, m_one, done;
12458    __ xorptr($dst$$Register, $dst$$Register);
12459    __ cmpl(HIGH_FROM_LOW($src1$$Register), HIGH_FROM_LOW($src2$$Register));
12460    __ jccb(Assembler::less,    m_one);
12461    __ jccb(Assembler::greater, p_one);
12462    __ cmpl($src1$$Register, $src2$$Register);
12463    __ jccb(Assembler::below,   m_one);
12464    __ jccb(Assembler::equal,   done);
12465    __ bind(p_one);
12466    __ incrementl($dst$$Register);
12467    __ jmpb(done);
12468    __ bind(m_one);
12469    __ decrementl($dst$$Register);
12470    __ bind(done);
12471  %}
12472  ins_pipe( pipe_slow );
12473%}
12474
12475//======
12476// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12477// compares.  Can be used for LE or GT compares by reversing arguments.
12478// NOT GOOD FOR EQ/NE tests.
12479instruct cmpL_zero_flags_LTGE( flagsReg_long_LTGE flags, eRegL src, immL0 zero ) %{
12480  match( Set flags (CmpL src zero ));
12481  ins_cost(100);
12482  format %{ "TEST   $src.hi,$src.hi" %}
12483  opcode(0x85);
12484  ins_encode( OpcP, RegReg_Hi2( src, src ) );
12485  ins_pipe( ialu_cr_reg_reg );
12486%}
12487
12488// Manifest a CmpL result in the normal flags.  Only good for LT or GE
12489// compares.  Can be used for LE or GT compares by reversing arguments.
12490// NOT GOOD FOR EQ/NE tests.
12491instruct cmpL_reg_flags_LTGE( flagsReg_long_LTGE flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12492  match( Set flags (CmpL src1 src2 ));
12493  effect( TEMP tmp );
12494  ins_cost(300);
12495  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12496            "MOV    $tmp,$src1.hi\n\t"
12497            "SBB    $tmp,$src2.hi\t! Compute flags for long compare" %}
12498  ins_encode( long_cmp_flags2( src1, src2, tmp ) );
12499  ins_pipe( ialu_cr_reg_reg );
12500%}
12501
12502// Long compares reg < zero/req OR reg >= zero/req.
12503// Just a wrapper for a normal branch, plus the predicate test.
12504instruct cmpL_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, label labl) %{
12505  match(If cmp flags);
12506  effect(USE labl);
12507  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12508  expand %{
12509    jmpCon(cmp,flags,labl);    // JLT or JGE...
12510  %}
12511%}
12512
12513// Compare 2 longs and CMOVE longs.
12514instruct cmovLL_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, eRegL src) %{
12515  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12516  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12517  ins_cost(400);
12518  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12519            "CMOV$cmp $dst.hi,$src.hi" %}
12520  opcode(0x0F,0x40);
12521  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12522  ins_pipe( pipe_cmov_reg_long );
12523%}
12524
12525instruct cmovLL_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegL dst, load_long_memory src) %{
12526  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12527  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12528  ins_cost(500);
12529  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12530            "CMOV$cmp $dst.hi,$src.hi" %}
12531  opcode(0x0F,0x40);
12532  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12533  ins_pipe( pipe_cmov_reg_long );
12534%}
12535
12536// Compare 2 longs and CMOVE ints.
12537instruct cmovII_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, rRegI src) %{
12538  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12539  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12540  ins_cost(200);
12541  format %{ "CMOV$cmp $dst,$src" %}
12542  opcode(0x0F,0x40);
12543  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12544  ins_pipe( pipe_cmov_reg );
12545%}
12546
12547instruct cmovII_mem_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, rRegI dst, memory src) %{
12548  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12549  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12550  ins_cost(250);
12551  format %{ "CMOV$cmp $dst,$src" %}
12552  opcode(0x0F,0x40);
12553  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12554  ins_pipe( pipe_cmov_mem );
12555%}
12556
12557// Compare 2 longs and CMOVE ints.
12558instruct cmovPP_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, eRegP dst, eRegP src) %{
12559  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge ));
12560  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12561  ins_cost(200);
12562  format %{ "CMOV$cmp $dst,$src" %}
12563  opcode(0x0F,0x40);
12564  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12565  ins_pipe( pipe_cmov_reg );
12566%}
12567
12568// Compare 2 longs and CMOVE doubles
12569instruct cmovDDPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regDPR dst, regDPR src) %{
12570  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12571  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12572  ins_cost(200);
12573  expand %{
12574    fcmovDPR_regS(cmp,flags,dst,src);
12575  %}
12576%}
12577
12578// Compare 2 longs and CMOVE doubles
12579instruct cmovDD_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regD dst, regD src) %{
12580  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12581  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12582  ins_cost(200);
12583  expand %{
12584    fcmovD_regS(cmp,flags,dst,src);
12585  %}
12586%}
12587
12588instruct cmovFFPR_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regFPR dst, regFPR src) %{
12589  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12590  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12591  ins_cost(200);
12592  expand %{
12593    fcmovFPR_regS(cmp,flags,dst,src);
12594  %}
12595%}
12596
12597instruct cmovFF_reg_LTGE(cmpOp cmp, flagsReg_long_LTGE flags, regF dst, regF src) %{
12598  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::lt || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ge );
12599  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12600  ins_cost(200);
12601  expand %{
12602    fcmovF_regS(cmp,flags,dst,src);
12603  %}
12604%}
12605
12606//======
12607// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12608instruct cmpL_zero_flags_EQNE( flagsReg_long_EQNE flags, eRegL src, immL0 zero, rRegI tmp ) %{
12609  match( Set flags (CmpL src zero ));
12610  effect(TEMP tmp);
12611  ins_cost(200);
12612  format %{ "MOV    $tmp,$src.lo\n\t"
12613            "OR     $tmp,$src.hi\t! Long is EQ/NE 0?" %}
12614  ins_encode( long_cmp_flags0( src, tmp ) );
12615  ins_pipe( ialu_reg_reg_long );
12616%}
12617
12618// Manifest a CmpL result in the normal flags.  Only good for EQ/NE compares.
12619instruct cmpL_reg_flags_EQNE( flagsReg_long_EQNE flags, eRegL src1, eRegL src2 ) %{
12620  match( Set flags (CmpL src1 src2 ));
12621  ins_cost(200+300);
12622  format %{ "CMP    $src1.lo,$src2.lo\t! Long compare; set flags for low bits\n\t"
12623            "JNE,s  skip\n\t"
12624            "CMP    $src1.hi,$src2.hi\n\t"
12625     "skip:\t" %}
12626  ins_encode( long_cmp_flags1( src1, src2 ) );
12627  ins_pipe( ialu_cr_reg_reg );
12628%}
12629
12630// Long compare reg == zero/reg OR reg != zero/reg
12631// Just a wrapper for a normal branch, plus the predicate test.
12632instruct cmpL_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, label labl) %{
12633  match(If cmp flags);
12634  effect(USE labl);
12635  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12636  expand %{
12637    jmpCon(cmp,flags,labl);    // JEQ or JNE...
12638  %}
12639%}
12640
12641// Compare 2 longs and CMOVE longs.
12642instruct cmovLL_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, eRegL src) %{
12643  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12644  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12645  ins_cost(400);
12646  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12647            "CMOV$cmp $dst.hi,$src.hi" %}
12648  opcode(0x0F,0x40);
12649  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12650  ins_pipe( pipe_cmov_reg_long );
12651%}
12652
12653instruct cmovLL_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegL dst, load_long_memory src) %{
12654  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12655  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12656  ins_cost(500);
12657  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12658            "CMOV$cmp $dst.hi,$src.hi" %}
12659  opcode(0x0F,0x40);
12660  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12661  ins_pipe( pipe_cmov_reg_long );
12662%}
12663
12664// Compare 2 longs and CMOVE ints.
12665instruct cmovII_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, rRegI src) %{
12666  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12667  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12668  ins_cost(200);
12669  format %{ "CMOV$cmp $dst,$src" %}
12670  opcode(0x0F,0x40);
12671  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12672  ins_pipe( pipe_cmov_reg );
12673%}
12674
12675instruct cmovII_mem_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, rRegI dst, memory src) %{
12676  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12677  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12678  ins_cost(250);
12679  format %{ "CMOV$cmp $dst,$src" %}
12680  opcode(0x0F,0x40);
12681  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12682  ins_pipe( pipe_cmov_mem );
12683%}
12684
12685// Compare 2 longs and CMOVE ints.
12686instruct cmovPP_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, eRegP dst, eRegP src) %{
12687  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne ));
12688  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12689  ins_cost(200);
12690  format %{ "CMOV$cmp $dst,$src" %}
12691  opcode(0x0F,0x40);
12692  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12693  ins_pipe( pipe_cmov_reg );
12694%}
12695
12696// Compare 2 longs and CMOVE doubles
12697instruct cmovDDPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regDPR dst, regDPR src) %{
12698  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12699  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12700  ins_cost(200);
12701  expand %{
12702    fcmovDPR_regS(cmp,flags,dst,src);
12703  %}
12704%}
12705
12706// Compare 2 longs and CMOVE doubles
12707instruct cmovDD_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regD dst, regD src) %{
12708  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12709  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12710  ins_cost(200);
12711  expand %{
12712    fcmovD_regS(cmp,flags,dst,src);
12713  %}
12714%}
12715
12716instruct cmovFFPR_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regFPR dst, regFPR src) %{
12717  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12718  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12719  ins_cost(200);
12720  expand %{
12721    fcmovFPR_regS(cmp,flags,dst,src);
12722  %}
12723%}
12724
12725instruct cmovFF_reg_EQNE(cmpOp cmp, flagsReg_long_EQNE flags, regF dst, regF src) %{
12726  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::eq || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::ne );
12727  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12728  ins_cost(200);
12729  expand %{
12730    fcmovF_regS(cmp,flags,dst,src);
12731  %}
12732%}
12733
12734//======
12735// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12736// Same as cmpL_reg_flags_LEGT except must negate src
12737instruct cmpL_zero_flags_LEGT( flagsReg_long_LEGT flags, eRegL src, immL0 zero, rRegI tmp ) %{
12738  match( Set flags (CmpL src zero ));
12739  effect( TEMP tmp );
12740  ins_cost(300);
12741  format %{ "XOR    $tmp,$tmp\t# Long compare for -$src < 0, use commuted test\n\t"
12742            "CMP    $tmp,$src.lo\n\t"
12743            "SBB    $tmp,$src.hi\n\t" %}
12744  ins_encode( long_cmp_flags3(src, tmp) );
12745  ins_pipe( ialu_reg_reg_long );
12746%}
12747
12748// Manifest a CmpL result in the normal flags.  Only good for LE or GT compares.
12749// Same as cmpL_reg_flags_LTGE except operands swapped.  Swapping operands
12750// requires a commuted test to get the same result.
12751instruct cmpL_reg_flags_LEGT( flagsReg_long_LEGT flags, eRegL src1, eRegL src2, rRegI tmp ) %{
12752  match( Set flags (CmpL src1 src2 ));
12753  effect( TEMP tmp );
12754  ins_cost(300);
12755  format %{ "CMP    $src2.lo,$src1.lo\t! Long compare, swapped operands, use with commuted test\n\t"
12756            "MOV    $tmp,$src2.hi\n\t"
12757            "SBB    $tmp,$src1.hi\t! Compute flags for long compare" %}
12758  ins_encode( long_cmp_flags2( src2, src1, tmp ) );
12759  ins_pipe( ialu_cr_reg_reg );
12760%}
12761
12762// Long compares reg < zero/req OR reg >= zero/req.
12763// Just a wrapper for a normal branch, plus the predicate test
12764instruct cmpL_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, label labl) %{
12765  match(If cmp flags);
12766  effect(USE labl);
12767  predicate( _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt || _kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le );
12768  ins_cost(300);
12769  expand %{
12770    jmpCon(cmp,flags,labl);    // JGT or JLE...
12771  %}
12772%}
12773
12774// Compare 2 longs and CMOVE longs.
12775instruct cmovLL_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, eRegL src) %{
12776  match(Set dst (CMoveL (Binary cmp flags) (Binary dst src)));
12777  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12778  ins_cost(400);
12779  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12780            "CMOV$cmp $dst.hi,$src.hi" %}
12781  opcode(0x0F,0x40);
12782  ins_encode( enc_cmov(cmp), RegReg_Lo2( dst, src ), enc_cmov(cmp), RegReg_Hi2( dst, src ) );
12783  ins_pipe( pipe_cmov_reg_long );
12784%}
12785
12786instruct cmovLL_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegL dst, load_long_memory src) %{
12787  match(Set dst (CMoveL (Binary cmp flags) (Binary dst (LoadL src))));
12788  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12789  ins_cost(500);
12790  format %{ "CMOV$cmp $dst.lo,$src.lo\n\t"
12791            "CMOV$cmp $dst.hi,$src.hi+4" %}
12792  opcode(0x0F,0x40);
12793  ins_encode( enc_cmov(cmp), RegMem(dst, src), enc_cmov(cmp), RegMem_Hi(dst, src) );
12794  ins_pipe( pipe_cmov_reg_long );
12795%}
12796
12797// Compare 2 longs and CMOVE ints.
12798instruct cmovII_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, rRegI src) %{
12799  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12800  match(Set dst (CMoveI (Binary cmp flags) (Binary dst src)));
12801  ins_cost(200);
12802  format %{ "CMOV$cmp $dst,$src" %}
12803  opcode(0x0F,0x40);
12804  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12805  ins_pipe( pipe_cmov_reg );
12806%}
12807
12808instruct cmovII_mem_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, rRegI dst, memory src) %{
12809  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12810  match(Set dst (CMoveI (Binary cmp flags) (Binary dst (LoadI src))));
12811  ins_cost(250);
12812  format %{ "CMOV$cmp $dst,$src" %}
12813  opcode(0x0F,0x40);
12814  ins_encode( enc_cmov(cmp), RegMem( dst, src ) );
12815  ins_pipe( pipe_cmov_mem );
12816%}
12817
12818// Compare 2 longs and CMOVE ptrs.
12819instruct cmovPP_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, eRegP dst, eRegP src) %{
12820  predicate(VM_Version::supports_cmov() && ( _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt ));
12821  match(Set dst (CMoveP (Binary cmp flags) (Binary dst src)));
12822  ins_cost(200);
12823  format %{ "CMOV$cmp $dst,$src" %}
12824  opcode(0x0F,0x40);
12825  ins_encode( enc_cmov(cmp), RegReg( dst, src ) );
12826  ins_pipe( pipe_cmov_reg );
12827%}
12828
12829// Compare 2 longs and CMOVE doubles
12830instruct cmovDDPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regDPR dst, regDPR src) %{
12831  predicate( UseSSE<=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12832  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12833  ins_cost(200);
12834  expand %{
12835    fcmovDPR_regS(cmp,flags,dst,src);
12836  %}
12837%}
12838
12839// Compare 2 longs and CMOVE doubles
12840instruct cmovDD_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regD dst, regD src) %{
12841  predicate( UseSSE>=2 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12842  match(Set dst (CMoveD (Binary cmp flags) (Binary dst src)));
12843  ins_cost(200);
12844  expand %{
12845    fcmovD_regS(cmp,flags,dst,src);
12846  %}
12847%}
12848
12849instruct cmovFFPR_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regFPR dst, regFPR src) %{
12850  predicate( UseSSE==0 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12851  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12852  ins_cost(200);
12853  expand %{
12854    fcmovFPR_regS(cmp,flags,dst,src);
12855  %}
12856%}
12857
12858
12859instruct cmovFF_reg_LEGT(cmpOp_commute cmp, flagsReg_long_LEGT flags, regF dst, regF src) %{
12860  predicate( UseSSE>=1 && _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::le || _kids[0]->_kids[0]->_leaf->as_Bool()->_test._test == BoolTest::gt );
12861  match(Set dst (CMoveF (Binary cmp flags) (Binary dst src)));
12862  ins_cost(200);
12863  expand %{
12864    fcmovF_regS(cmp,flags,dst,src);
12865  %}
12866%}
12867
12868
12869// ============================================================================
12870// Procedure Call/Return Instructions
12871// Call Java Static Instruction
12872// Note: If this code changes, the corresponding ret_addr_offset() and
12873//       compute_padding() functions will have to be adjusted.
12874instruct CallStaticJavaDirect(method meth) %{
12875  match(CallStaticJava);
12876  effect(USE meth);
12877
12878  ins_cost(300);
12879  format %{ "CALL,static " %}
12880  opcode(0xE8); /* E8 cd */
12881  ins_encode( pre_call_resets,
12882              Java_Static_Call( meth ),
12883              call_epilog,
12884              post_call_FPU );
12885  ins_pipe( pipe_slow );
12886  ins_alignment(4);
12887%}
12888
12889// Call Java Dynamic Instruction
12890// Note: If this code changes, the corresponding ret_addr_offset() and
12891//       compute_padding() functions will have to be adjusted.
12892instruct CallDynamicJavaDirect(method meth) %{
12893  match(CallDynamicJava);
12894  effect(USE meth);
12895
12896  ins_cost(300);
12897  format %{ "MOV    EAX,(oop)-1\n\t"
12898            "CALL,dynamic" %}
12899  opcode(0xE8); /* E8 cd */
12900  ins_encode( pre_call_resets,
12901              Java_Dynamic_Call( meth ),
12902              call_epilog,
12903              post_call_FPU );
12904  ins_pipe( pipe_slow );
12905  ins_alignment(4);
12906%}
12907
12908// Call Runtime Instruction
12909instruct CallRuntimeDirect(method meth) %{
12910  match(CallRuntime );
12911  effect(USE meth);
12912
12913  ins_cost(300);
12914  format %{ "CALL,runtime " %}
12915  opcode(0xE8); /* E8 cd */
12916  // Use FFREEs to clear entries in float stack
12917  ins_encode( pre_call_resets,
12918              FFree_Float_Stack_All,
12919              Java_To_Runtime( meth ),
12920              post_call_FPU );
12921  ins_pipe( pipe_slow );
12922%}
12923
12924// Call runtime without safepoint
12925instruct CallLeafDirect(method meth) %{
12926  match(CallLeaf);
12927  effect(USE meth);
12928
12929  ins_cost(300);
12930  format %{ "CALL_LEAF,runtime " %}
12931  opcode(0xE8); /* E8 cd */
12932  ins_encode( pre_call_resets,
12933              FFree_Float_Stack_All,
12934              Java_To_Runtime( meth ),
12935              Verify_FPU_For_Leaf, post_call_FPU );
12936  ins_pipe( pipe_slow );
12937%}
12938
12939instruct CallLeafNoFPDirect(method meth) %{
12940  match(CallLeafNoFP);
12941  effect(USE meth);
12942
12943  ins_cost(300);
12944  format %{ "CALL_LEAF_NOFP,runtime " %}
12945  opcode(0xE8); /* E8 cd */
12946  ins_encode(Java_To_Runtime(meth));
12947  ins_pipe( pipe_slow );
12948%}
12949
12950
12951// Return Instruction
12952// Remove the return address & jump to it.
12953instruct Ret() %{
12954  match(Return);
12955  format %{ "RET" %}
12956  opcode(0xC3);
12957  ins_encode(OpcP);
12958  ins_pipe( pipe_jmp );
12959%}
12960
12961// Tail Call; Jump from runtime stub to Java code.
12962// Also known as an 'interprocedural jump'.
12963// Target of jump will eventually return to caller.
12964// TailJump below removes the return address.
12965instruct TailCalljmpInd(eRegP_no_EBP jump_target, eBXRegP method_oop) %{
12966  match(TailCall jump_target method_oop );
12967  ins_cost(300);
12968  format %{ "JMP    $jump_target \t# EBX holds method oop" %}
12969  opcode(0xFF, 0x4);  /* Opcode FF /4 */
12970  ins_encode( OpcP, RegOpc(jump_target) );
12971  ins_pipe( pipe_jmp );
12972%}
12973
12974
12975// Tail Jump; remove the return address; jump to target.
12976// TailCall above leaves the return address around.
12977instruct tailjmpInd(eRegP_no_EBP jump_target, eAXRegP ex_oop) %{
12978  match( TailJump jump_target ex_oop );
12979  ins_cost(300);
12980  format %{ "POP    EDX\t# pop return address into dummy\n\t"
12981            "JMP    $jump_target " %}
12982  opcode(0xFF, 0x4);  /* Opcode FF /4 */
12983  ins_encode( enc_pop_rdx,
12984              OpcP, RegOpc(jump_target) );
12985  ins_pipe( pipe_jmp );
12986%}
12987
12988// Create exception oop: created by stack-crawling runtime code.
12989// Created exception is now available to this handler, and is setup
12990// just prior to jumping to this handler.  No code emitted.
12991instruct CreateException( eAXRegP ex_oop )
12992%{
12993  match(Set ex_oop (CreateEx));
12994
12995  size(0);
12996  // use the following format syntax
12997  format %{ "# exception oop is in EAX; no code emitted" %}
12998  ins_encode();
12999  ins_pipe( empty );
13000%}
13001
13002
13003// Rethrow exception:
13004// The exception oop will come in the first argument position.
13005// Then JUMP (not call) to the rethrow stub code.
13006instruct RethrowException()
13007%{
13008  match(Rethrow);
13009
13010  // use the following format syntax
13011  format %{ "JMP    rethrow_stub" %}
13012  ins_encode(enc_rethrow);
13013  ins_pipe( pipe_jmp );
13014%}
13015
13016// inlined locking and unlocking
13017
13018instruct cmpFastLockRTM(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eDXRegI scr, rRegI cx1, rRegI cx2) %{
13019  predicate(Compile::current()->use_rtm());
13020  match(Set cr (FastLock object box));
13021  effect(TEMP tmp, TEMP scr, TEMP cx1, TEMP cx2, USE_KILL box);
13022  ins_cost(300);
13023  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr,$cx1,$cx2" %}
13024  ins_encode %{
13025    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13026                 $scr$$Register, $cx1$$Register, $cx2$$Register,
13027                 _counters, _rtm_counters, _stack_rtm_counters,
13028                 ((Method*)(ra_->C->method()->constant_encoding()))->method_data(),
13029                 true, ra_->C->profile_rtm());
13030  %}
13031  ins_pipe(pipe_slow);
13032%}
13033
13034instruct cmpFastLock(eFlagsReg cr, eRegP object, eBXRegP box, eAXRegI tmp, eRegP scr) %{
13035  predicate(!Compile::current()->use_rtm());
13036  match(Set cr (FastLock object box));
13037  effect(TEMP tmp, TEMP scr, USE_KILL box);
13038  ins_cost(300);
13039  format %{ "FASTLOCK $object,$box\t! kills $box,$tmp,$scr" %}
13040  ins_encode %{
13041    __ fast_lock($object$$Register, $box$$Register, $tmp$$Register,
13042                 $scr$$Register, noreg, noreg, _counters, NULL, NULL, NULL, false, false);
13043  %}
13044  ins_pipe(pipe_slow);
13045%}
13046
13047instruct cmpFastUnlock(eFlagsReg cr, eRegP object, eAXRegP box, eRegP tmp ) %{
13048  match(Set cr (FastUnlock object box));
13049  effect(TEMP tmp, USE_KILL box);
13050  ins_cost(300);
13051  format %{ "FASTUNLOCK $object,$box\t! kills $box,$tmp" %}
13052  ins_encode %{
13053    __ fast_unlock($object$$Register, $box$$Register, $tmp$$Register, ra_->C->use_rtm());
13054  %}
13055  ins_pipe(pipe_slow);
13056%}
13057
13058
13059
13060// ============================================================================
13061// Safepoint Instruction
13062instruct safePoint_poll(eFlagsReg cr) %{
13063  match(SafePoint);
13064  effect(KILL cr);
13065
13066  // TODO-FIXME: we currently poll at offset 0 of the safepoint polling page.
13067  // On SPARC that might be acceptable as we can generate the address with
13068  // just a sethi, saving an or.  By polling at offset 0 we can end up
13069  // putting additional pressure on the index-0 in the D$.  Because of
13070  // alignment (just like the situation at hand) the lower indices tend
13071  // to see more traffic.  It'd be better to change the polling address
13072  // to offset 0 of the last $line in the polling page.
13073
13074  format %{ "TSTL   #polladdr,EAX\t! Safepoint: poll for GC" %}
13075  ins_cost(125);
13076  size(6) ;
13077  ins_encode( Safepoint_Poll() );
13078  ins_pipe( ialu_reg_mem );
13079%}
13080
13081
13082// ============================================================================
13083// This name is KNOWN by the ADLC and cannot be changed.
13084// The ADLC forces a 'TypeRawPtr::BOTTOM' output type
13085// for this guy.
13086instruct tlsLoadP(eRegP dst, eFlagsReg cr) %{
13087  match(Set dst (ThreadLocal));
13088  effect(DEF dst, KILL cr);
13089
13090  format %{ "MOV    $dst, Thread::current()" %}
13091  ins_encode %{
13092    Register dstReg = as_Register($dst$$reg);
13093    __ get_thread(dstReg);
13094  %}
13095  ins_pipe( ialu_reg_fat );
13096%}
13097
13098
13099
13100//----------PEEPHOLE RULES-----------------------------------------------------
13101// These must follow all instruction definitions as they use the names
13102// defined in the instructions definitions.
13103//
13104// peepmatch ( root_instr_name [preceding_instruction]* );
13105//
13106// peepconstraint %{
13107// (instruction_number.operand_name relational_op instruction_number.operand_name
13108//  [, ...] );
13109// // instruction numbers are zero-based using left to right order in peepmatch
13110//
13111// peepreplace ( instr_name  ( [instruction_number.operand_name]* ) );
13112// // provide an instruction_number.operand_name for each operand that appears
13113// // in the replacement instruction's match rule
13114//
13115// ---------VM FLAGS---------------------------------------------------------
13116//
13117// All peephole optimizations can be turned off using -XX:-OptoPeephole
13118//
13119// Each peephole rule is given an identifying number starting with zero and
13120// increasing by one in the order seen by the parser.  An individual peephole
13121// can be enabled, and all others disabled, by using -XX:OptoPeepholeAt=#
13122// on the command-line.
13123//
13124// ---------CURRENT LIMITATIONS----------------------------------------------
13125//
13126// Only match adjacent instructions in same basic block
13127// Only equality constraints
13128// Only constraints between operands, not (0.dest_reg == EAX_enc)
13129// Only one replacement instruction
13130//
13131// ---------EXAMPLE----------------------------------------------------------
13132//
13133// // pertinent parts of existing instructions in architecture description
13134// instruct movI(rRegI dst, rRegI src) %{
13135//   match(Set dst (CopyI src));
13136// %}
13137//
13138// instruct incI_eReg(rRegI dst, immI1 src, eFlagsReg cr) %{
13139//   match(Set dst (AddI dst src));
13140//   effect(KILL cr);
13141// %}
13142//
13143// // Change (inc mov) to lea
13144// peephole %{
13145//   // increment preceeded by register-register move
13146//   peepmatch ( incI_eReg movI );
13147//   // require that the destination register of the increment
13148//   // match the destination register of the move
13149//   peepconstraint ( 0.dst == 1.dst );
13150//   // construct a replacement instruction that sets
13151//   // the destination to ( move's source register + one )
13152//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13153// %}
13154//
13155// Implementation no longer uses movX instructions since
13156// machine-independent system no longer uses CopyX nodes.
13157//
13158// peephole %{
13159//   peepmatch ( incI_eReg movI );
13160//   peepconstraint ( 0.dst == 1.dst );
13161//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13162// %}
13163//
13164// peephole %{
13165//   peepmatch ( decI_eReg movI );
13166//   peepconstraint ( 0.dst == 1.dst );
13167//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13168// %}
13169//
13170// peephole %{
13171//   peepmatch ( addI_eReg_imm movI );
13172//   peepconstraint ( 0.dst == 1.dst );
13173//   peepreplace ( leaI_eReg_immI( 0.dst 1.src 0.src ) );
13174// %}
13175//
13176// peephole %{
13177//   peepmatch ( addP_eReg_imm movP );
13178//   peepconstraint ( 0.dst == 1.dst );
13179//   peepreplace ( leaP_eReg_immI( 0.dst 1.src 0.src ) );
13180// %}
13181
13182// // Change load of spilled value to only a spill
13183// instruct storeI(memory mem, rRegI src) %{
13184//   match(Set mem (StoreI mem src));
13185// %}
13186//
13187// instruct loadI(rRegI dst, memory mem) %{
13188//   match(Set dst (LoadI mem));
13189// %}
13190//
13191peephole %{
13192  peepmatch ( loadI storeI );
13193  peepconstraint ( 1.src == 0.dst, 1.mem == 0.mem );
13194  peepreplace ( storeI( 1.mem 1.mem 1.src ) );
13195%}
13196
13197//----------SMARTSPILL RULES---------------------------------------------------
13198// These must follow all instruction definitions as they use the names
13199// defined in the instructions definitions.
13200